1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 59 PetscErrorCode ierr; 60 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 61 62 PetscFunctionBegin; 63 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 64 lrow = row - rstart; 65 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 PetscBool flg; 77 Mat B; 78 79 PetscFunctionBegin; 80 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 81 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 82 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 83 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 84 85 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 86 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 87 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 88 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 89 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 90 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 91 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 92 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 93 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 97 *a = B; 98 ierr = MatDestroy(&B);CHKERRQ(ierr); 99 } else *a = B; 100 PetscFunctionReturn(0); 101 } 102 103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 104 { 105 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 106 PetscErrorCode ierr; 107 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 108 PetscBool roworiented = A->roworiented; 109 110 PetscFunctionBegin; 111 for (i=0; i<m; i++) { 112 if (idxm[i] < 0) continue; 113 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 114 if (idxm[i] >= rstart && idxm[i] < rend) { 115 row = idxm[i] - rstart; 116 if (roworiented) { 117 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 118 } else { 119 for (j=0; j<n; j++) { 120 if (idxn[j] < 0) continue; 121 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 122 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 123 } 124 } 125 } else if (!A->donotstash) { 126 mat->assembled = PETSC_FALSE; 127 if (roworiented) { 128 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 129 } else { 130 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 160 { 161 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 162 PetscErrorCode ierr; 163 164 PetscFunctionBegin; 165 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 170 { 171 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 172 PetscBool iscuda; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 if (!a->A) { 177 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 178 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 179 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 180 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 181 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 182 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 183 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 184 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 185 } 186 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 191 { 192 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 197 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 230 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 231 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 236 { 237 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 238 PetscErrorCode ierr; 239 240 PetscFunctionBegin; 241 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 242 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 243 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 248 { 249 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 250 PetscErrorCode ierr; 251 252 PetscFunctionBegin; 253 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 254 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 255 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 260 { 261 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 262 PetscErrorCode ierr; 263 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 264 const PetscInt *irow,*icol; 265 const PetscScalar *v; 266 PetscScalar *bv; 267 Mat newmat; 268 IS iscol_local; 269 MPI_Comm comm_is,comm_mat; 270 271 PetscFunctionBegin; 272 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 273 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 274 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 275 276 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 278 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 279 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 280 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 281 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 282 283 /* No parallel redistribution currently supported! Should really check each index set 284 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 285 original matrix! */ 286 287 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 288 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 289 290 /* Check submatrix call */ 291 if (scall == MAT_REUSE_MATRIX) { 292 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 293 /* Really need to test rows and column sizes! */ 294 newmat = *B; 295 } else { 296 /* Create and fill new matrix */ 297 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 298 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 299 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 300 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 301 } 302 303 /* Now extract the data pointers and do the copy, column at a time */ 304 newmatd = (Mat_MPIDense*)newmat->data; 305 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 306 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 307 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 308 for (i=0; i<Ncols; i++) { 309 const PetscScalar *av = v + lda*icol[i]; 310 for (j=0; j<nrows; j++) { 311 *bv++ = av[irow[j] - rstart]; 312 } 313 } 314 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 315 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 316 317 /* Assemble the matrices so that the correct flags are set */ 318 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 321 /* Free work space */ 322 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 323 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 324 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 325 *B = newmat; 326 PetscFunctionReturn(0); 327 } 328 329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 330 { 331 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 334 PetscFunctionBegin; 335 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 340 { 341 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 360 { 361 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt nstash,reallocs; 364 365 PetscFunctionBegin; 366 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 367 368 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 369 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 370 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 375 { 376 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 377 PetscErrorCode ierr; 378 PetscInt i,*row,*col,flg,j,rstart,ncols; 379 PetscMPIInt n; 380 PetscScalar *val; 381 382 PetscFunctionBegin; 383 if (!mdn->donotstash && !mat->nooffprocentries) { 384 /* wait on receives */ 385 while (1) { 386 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 387 if (!flg) break; 388 389 for (i=0; i<n;) { 390 /* Now identify the consecutive vals belonging to the same row */ 391 for (j=i,rstart=row[j]; j<n; j++) { 392 if (row[j] != rstart) break; 393 } 394 if (j < n) ncols = j-i; 395 else ncols = n-i; 396 /* Now assemble all these values with a single function call */ 397 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 398 i = j; 399 } 400 } 401 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 402 } 403 404 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 405 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 406 407 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 408 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 409 } 410 PetscFunctionReturn(0); 411 } 412 413 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 414 { 415 PetscErrorCode ierr; 416 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 424 { 425 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 426 PetscErrorCode ierr; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 438 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 441 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 442 } 443 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 444 if (diag != 0.0) { 445 Vec d; 446 447 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 448 ierr = VecSet(d,diag);CHKERRQ(ierr); 449 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 450 ierr = VecDestroy(&d);CHKERRQ(ierr); 451 } 452 ierr = PetscFree(lrows);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 PetscErrorCode ierr; 465 const PetscScalar *ax; 466 PetscScalar *ay; 467 PetscMemType axmtype,aymtype; 468 469 PetscFunctionBegin; 470 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 471 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 472 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 473 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 474 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 475 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 476 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 PetscErrorCode ierr; 484 const PetscScalar *ax; 485 PetscScalar *ay; 486 PetscMemType axmtype,aymtype; 487 488 PetscFunctionBegin; 489 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 490 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 491 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 492 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 493 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 494 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 495 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 PetscErrorCode ierr; 503 const PetscScalar *ax; 504 PetscScalar *ay; 505 PetscMemType axmtype,aymtype; 506 507 PetscFunctionBegin; 508 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 509 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 511 ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr); 512 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 513 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 514 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 515 ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 520 { 521 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 522 PetscErrorCode ierr; 523 const PetscScalar *ax; 524 PetscScalar *ay; 525 PetscMemType axmtype,aymtype; 526 527 PetscFunctionBegin; 528 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 529 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 530 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 531 ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr); 532 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 533 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 534 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 535 ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 PetscErrorCode ierr; 543 PetscInt lda,len,i,n,m = A->rmap->n,radd; 544 PetscScalar *x,zero = 0.0; 545 const PetscScalar *av; 546 547 PetscFunctionBegin; 548 ierr = VecSet(v,zero);CHKERRQ(ierr); 549 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 550 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 551 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 552 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 553 radd = A->rmap->rstart*m; 554 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 555 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 556 for (i=0; i<len; i++) { 557 x[i] = av[radd + i*lda + i]; 558 } 559 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 560 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 561 PetscFunctionReturn(0); 562 } 563 564 PetscErrorCode MatDestroy_MPIDense(Mat mat) 565 { 566 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 567 PetscErrorCode ierr; 568 569 PetscFunctionBegin; 570 #if defined(PETSC_USE_LOG) 571 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 572 #endif 573 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 574 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 575 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 576 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 577 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 578 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 579 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 580 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 581 582 ierr = PetscFree(mat->data);CHKERRQ(ierr); 583 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 584 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 596 #if defined(PETSC_HAVE_ELEMENTAL) 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 598 #endif 599 #if defined(PETSC_HAVE_SCALAPACK) 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 601 #endif 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 605 #if defined (PETSC_HAVE_CUDA) 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 608 #endif 609 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 611 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 612 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 614 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 616 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 618 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 623 624 #include <petscdraw.h> 625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 626 { 627 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 628 PetscErrorCode ierr; 629 PetscMPIInt rank; 630 PetscViewerType vtype; 631 PetscBool iascii,isdraw; 632 PetscViewer sviewer; 633 PetscViewerFormat format; 634 635 PetscFunctionBegin; 636 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 637 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 638 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 639 if (iascii) { 640 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 641 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 642 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 643 MatInfo info; 644 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 645 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 646 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 647 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 648 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 649 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } else if (format == PETSC_VIEWER_ASCII_INFO) { 652 PetscFunctionReturn(0); 653 } 654 } else if (isdraw) { 655 PetscDraw draw; 656 PetscBool isnull; 657 658 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 659 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 660 if (isnull) PetscFunctionReturn(0); 661 } 662 663 { 664 /* assemble the entire matrix onto first processor. */ 665 Mat A; 666 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 667 PetscInt *cols; 668 PetscScalar *vals; 669 670 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 671 if (!rank) { 672 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 673 } else { 674 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 675 } 676 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 677 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 678 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 679 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 680 681 /* Copy the matrix ... This isn't the most efficient means, 682 but it's quick for now */ 683 A->insertmode = INSERT_VALUES; 684 685 row = mat->rmap->rstart; 686 m = mdn->A->rmap->n; 687 for (i=0; i<m; i++) { 688 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 689 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 690 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 691 row++; 692 } 693 694 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 696 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 697 if (!rank) { 698 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 699 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 700 } 701 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 702 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 703 ierr = MatDestroy(&A);CHKERRQ(ierr); 704 } 705 PetscFunctionReturn(0); 706 } 707 708 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 709 { 710 PetscErrorCode ierr; 711 PetscBool iascii,isbinary,isdraw,issocket; 712 713 PetscFunctionBegin; 714 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 715 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 716 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 717 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 718 719 if (iascii || issocket || isdraw) { 720 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 721 } else if (isbinary) { 722 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 728 { 729 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 730 Mat mdn = mat->A; 731 PetscErrorCode ierr; 732 PetscLogDouble isend[5],irecv[5]; 733 734 PetscFunctionBegin; 735 info->block_size = 1.0; 736 737 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 738 739 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 740 isend[3] = info->memory; isend[4] = info->mallocs; 741 if (flag == MAT_LOCAL) { 742 info->nz_used = isend[0]; 743 info->nz_allocated = isend[1]; 744 info->nz_unneeded = isend[2]; 745 info->memory = isend[3]; 746 info->mallocs = isend[4]; 747 } else if (flag == MAT_GLOBAL_MAX) { 748 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 749 750 info->nz_used = irecv[0]; 751 info->nz_allocated = irecv[1]; 752 info->nz_unneeded = irecv[2]; 753 info->memory = irecv[3]; 754 info->mallocs = irecv[4]; 755 } else if (flag == MAT_GLOBAL_SUM) { 756 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 757 758 info->nz_used = irecv[0]; 759 info->nz_allocated = irecv[1]; 760 info->nz_unneeded = irecv[2]; 761 info->memory = irecv[3]; 762 info->mallocs = irecv[4]; 763 } 764 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 765 info->fill_ratio_needed = 0; 766 info->factor_mallocs = 0; 767 PetscFunctionReturn(0); 768 } 769 770 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 771 { 772 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 switch (op) { 777 case MAT_NEW_NONZERO_LOCATIONS: 778 case MAT_NEW_NONZERO_LOCATION_ERR: 779 case MAT_NEW_NONZERO_ALLOCATION_ERR: 780 MatCheckPreallocated(A,1); 781 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 782 break; 783 case MAT_ROW_ORIENTED: 784 MatCheckPreallocated(A,1); 785 a->roworiented = flg; 786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 787 break; 788 case MAT_FORCE_DIAGONAL_ENTRIES: 789 case MAT_KEEP_NONZERO_PATTERN: 790 case MAT_USE_HASH_TABLE: 791 case MAT_SORTED_FULL: 792 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 793 break; 794 case MAT_IGNORE_OFF_PROC_ENTRIES: 795 a->donotstash = flg; 796 break; 797 case MAT_SYMMETRIC: 798 case MAT_STRUCTURALLY_SYMMETRIC: 799 case MAT_HERMITIAN: 800 case MAT_SYMMETRY_ETERNAL: 801 case MAT_IGNORE_LOWER_TRIANGULAR: 802 case MAT_IGNORE_ZERO_ENTRIES: 803 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 804 break; 805 default: 806 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 812 { 813 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 814 const PetscScalar *l; 815 PetscScalar x,*v,*vv,*r; 816 PetscErrorCode ierr; 817 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 818 819 PetscFunctionBegin; 820 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 821 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 822 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 823 if (ll) { 824 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 825 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 826 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 827 for (i=0; i<m; i++) { 828 x = l[i]; 829 v = vv + i; 830 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 831 } 832 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 833 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 834 } 835 if (rr) { 836 const PetscScalar *ar; 837 838 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 839 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 840 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 841 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 842 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 843 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 844 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 845 for (i=0; i<n; i++) { 846 x = r[i]; 847 v = vv + i*lda; 848 for (j=0; j<m; j++) (*v++) *= x; 849 } 850 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 851 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 852 } 853 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 860 PetscErrorCode ierr; 861 PetscInt i,j; 862 PetscMPIInt size; 863 PetscReal sum = 0.0; 864 const PetscScalar *av,*v; 865 866 PetscFunctionBegin; 867 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 868 v = av; 869 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 870 if (size == 1) { 871 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 872 } else { 873 if (type == NORM_FROBENIUS) { 874 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 875 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 876 } 877 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 878 *nrm = PetscSqrtReal(*nrm); 879 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 880 } else if (type == NORM_1) { 881 PetscReal *tmp,*tmp2; 882 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 883 *nrm = 0.0; 884 v = av; 885 for (j=0; j<mdn->A->cmap->n; j++) { 886 for (i=0; i<mdn->A->rmap->n; i++) { 887 tmp[j] += PetscAbsScalar(*v); v++; 888 } 889 } 890 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 891 for (j=0; j<A->cmap->N; j++) { 892 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 893 } 894 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 895 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 896 } else if (type == NORM_INFINITY) { /* max row norm */ 897 PetscReal ntemp; 898 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 899 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 900 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 901 } 902 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 907 { 908 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 909 Mat B; 910 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 911 PetscErrorCode ierr; 912 PetscInt j,i,lda; 913 PetscScalar *v; 914 915 PetscFunctionBegin; 916 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 917 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 918 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 919 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 920 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 921 } else B = *matout; 922 923 m = a->A->rmap->n; n = a->A->cmap->n; 924 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 925 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 926 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 927 for (i=0; i<m; i++) rwork[i] = rstart + i; 928 for (j=0; j<n; j++) { 929 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 930 v += lda; 931 } 932 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 933 ierr = PetscFree(rwork);CHKERRQ(ierr); 934 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 935 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 936 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 937 *matout = B; 938 } else { 939 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 940 } 941 PetscFunctionReturn(0); 942 } 943 944 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 945 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 946 947 PetscErrorCode MatSetUp_MPIDense(Mat A) 948 { 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 953 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 954 if (!A->preallocated) { 955 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 956 } 957 PetscFunctionReturn(0); 958 } 959 960 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 961 { 962 PetscErrorCode ierr; 963 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 964 965 PetscFunctionBegin; 966 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatConjugate_MPIDense(Mat mat) 971 { 972 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = MatConjugate(a->A);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 PetscErrorCode MatRealPart_MPIDense(Mat A) 981 { 982 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = MatRealPart(a->A);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 991 { 992 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 993 PetscErrorCode ierr; 994 995 PetscFunctionBegin; 996 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1001 { 1002 PetscErrorCode ierr; 1003 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1004 1005 PetscFunctionBegin; 1006 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1007 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1008 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1013 1014 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1015 { 1016 PetscErrorCode ierr; 1017 PetscInt i,n; 1018 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1019 PetscReal *work; 1020 1021 PetscFunctionBegin; 1022 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1023 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1024 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1025 if (type == NORM_2) { 1026 for (i=0; i<n; i++) work[i] *= work[i]; 1027 } 1028 if (type == NORM_INFINITY) { 1029 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1030 } else { 1031 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(work);CHKERRQ(ierr); 1034 if (type == NORM_2) { 1035 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1036 } 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #if defined(PETSC_HAVE_CUDA) 1041 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1042 { 1043 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1044 PetscErrorCode ierr; 1045 PetscInt lda; 1046 1047 PetscFunctionBegin; 1048 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1049 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1050 if (!a->cvec) { 1051 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1052 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1053 } 1054 a->vecinuse = col + 1; 1055 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1056 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1057 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1058 *v = a->cvec; 1059 PetscFunctionReturn(0); 1060 } 1061 1062 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1063 { 1064 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1065 PetscErrorCode ierr; 1066 1067 PetscFunctionBegin; 1068 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1069 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1070 a->vecinuse = 0; 1071 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1072 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1073 *v = NULL; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1078 { 1079 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1080 PetscInt lda; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1085 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1086 if (!a->cvec) { 1087 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1088 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1089 } 1090 a->vecinuse = col + 1; 1091 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1092 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1093 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1094 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1095 *v = a->cvec; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1100 { 1101 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1106 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1107 a->vecinuse = 0; 1108 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1109 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1110 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1111 *v = NULL; 1112 PetscFunctionReturn(0); 1113 } 1114 1115 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1116 { 1117 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1118 PetscErrorCode ierr; 1119 PetscInt lda; 1120 1121 PetscFunctionBegin; 1122 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1123 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1124 if (!a->cvec) { 1125 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1126 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1127 } 1128 a->vecinuse = col + 1; 1129 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1130 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1131 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1132 *v = a->cvec; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1137 { 1138 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1143 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1144 a->vecinuse = 0; 1145 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1146 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1147 *v = NULL; 1148 PetscFunctionReturn(0); 1149 } 1150 1151 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1152 { 1153 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1158 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1159 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1164 { 1165 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1170 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1171 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1176 { 1177 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1182 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1183 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1188 { 1189 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1198 { 1199 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1208 { 1209 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1218 { 1219 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1220 PetscErrorCode ierr; 1221 1222 PetscFunctionBegin; 1223 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1228 { 1229 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1230 PetscErrorCode ierr; 1231 1232 PetscFunctionBegin; 1233 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1238 { 1239 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1248 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1249 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1250 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1251 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1252 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1253 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1254 1255 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1256 { 1257 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1262 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1263 if (d->A) { 1264 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1265 } 1266 mat->boundtocpu = bind; 1267 if (!bind) { 1268 PetscBool iscuda; 1269 1270 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1271 if (!iscuda) { 1272 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1273 } 1274 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1275 if (!iscuda) { 1276 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1277 } 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1284 } else { 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1291 } 1292 if (d->cmat) { 1293 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1299 { 1300 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1301 PetscErrorCode ierr; 1302 PetscBool iscuda; 1303 1304 PetscFunctionBegin; 1305 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1306 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1307 if (!iscuda) PetscFunctionReturn(0); 1308 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1309 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1310 if (!d->A) { 1311 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1312 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1313 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1314 } 1315 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1316 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1317 A->preallocated = PETSC_TRUE; 1318 PetscFunctionReturn(0); 1319 } 1320 #endif 1321 1322 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1323 { 1324 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1333 { 1334 PetscFunctionBegin; 1335 *missing = PETSC_FALSE; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1340 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1341 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1342 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1343 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1344 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1345 1346 /* -------------------------------------------------------------------*/ 1347 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1348 MatGetRow_MPIDense, 1349 MatRestoreRow_MPIDense, 1350 MatMult_MPIDense, 1351 /* 4*/ MatMultAdd_MPIDense, 1352 MatMultTranspose_MPIDense, 1353 MatMultTransposeAdd_MPIDense, 1354 NULL, 1355 NULL, 1356 NULL, 1357 /* 10*/ NULL, 1358 NULL, 1359 NULL, 1360 NULL, 1361 MatTranspose_MPIDense, 1362 /* 15*/ MatGetInfo_MPIDense, 1363 MatEqual_MPIDense, 1364 MatGetDiagonal_MPIDense, 1365 MatDiagonalScale_MPIDense, 1366 MatNorm_MPIDense, 1367 /* 20*/ MatAssemblyBegin_MPIDense, 1368 MatAssemblyEnd_MPIDense, 1369 MatSetOption_MPIDense, 1370 MatZeroEntries_MPIDense, 1371 /* 24*/ MatZeroRows_MPIDense, 1372 NULL, 1373 NULL, 1374 NULL, 1375 NULL, 1376 /* 29*/ MatSetUp_MPIDense, 1377 NULL, 1378 NULL, 1379 MatGetDiagonalBlock_MPIDense, 1380 NULL, 1381 /* 34*/ MatDuplicate_MPIDense, 1382 NULL, 1383 NULL, 1384 NULL, 1385 NULL, 1386 /* 39*/ MatAXPY_MPIDense, 1387 MatCreateSubMatrices_MPIDense, 1388 NULL, 1389 MatGetValues_MPIDense, 1390 NULL, 1391 /* 44*/ NULL, 1392 MatScale_MPIDense, 1393 MatShift_Basic, 1394 NULL, 1395 NULL, 1396 /* 49*/ MatSetRandom_MPIDense, 1397 NULL, 1398 NULL, 1399 NULL, 1400 NULL, 1401 /* 54*/ NULL, 1402 NULL, 1403 NULL, 1404 NULL, 1405 NULL, 1406 /* 59*/ MatCreateSubMatrix_MPIDense, 1407 MatDestroy_MPIDense, 1408 MatView_MPIDense, 1409 NULL, 1410 NULL, 1411 /* 64*/ NULL, 1412 NULL, 1413 NULL, 1414 NULL, 1415 NULL, 1416 /* 69*/ NULL, 1417 NULL, 1418 NULL, 1419 NULL, 1420 NULL, 1421 /* 74*/ NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 /* 79*/ NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 /* 83*/ MatLoad_MPIDense, 1431 NULL, 1432 NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 /* 89*/ NULL, 1437 NULL, 1438 NULL, 1439 NULL, 1440 NULL, 1441 /* 94*/ NULL, 1442 NULL, 1443 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1444 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1445 NULL, 1446 /* 99*/ MatProductSetFromOptions_MPIDense, 1447 NULL, 1448 NULL, 1449 MatConjugate_MPIDense, 1450 NULL, 1451 /*104*/ NULL, 1452 MatRealPart_MPIDense, 1453 MatImaginaryPart_MPIDense, 1454 NULL, 1455 NULL, 1456 /*109*/ NULL, 1457 NULL, 1458 NULL, 1459 MatGetColumnVector_MPIDense, 1460 MatMissingDiagonal_MPIDense, 1461 /*114*/ NULL, 1462 NULL, 1463 NULL, 1464 NULL, 1465 NULL, 1466 /*119*/ NULL, 1467 NULL, 1468 NULL, 1469 NULL, 1470 NULL, 1471 /*124*/ NULL, 1472 MatGetColumnNorms_MPIDense, 1473 NULL, 1474 NULL, 1475 NULL, 1476 /*129*/ NULL, 1477 NULL, 1478 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1479 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1480 NULL, 1481 /*134*/ NULL, 1482 NULL, 1483 NULL, 1484 NULL, 1485 NULL, 1486 /*139*/ NULL, 1487 NULL, 1488 NULL, 1489 NULL, 1490 NULL, 1491 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1492 /*145*/ NULL, 1493 NULL, 1494 NULL 1495 }; 1496 1497 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1498 { 1499 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1500 PetscBool iscuda; 1501 PetscErrorCode ierr; 1502 1503 PetscFunctionBegin; 1504 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1505 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1506 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1507 if (!a->A) { 1508 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1509 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1510 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1511 } 1512 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1513 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1514 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1515 mat->preallocated = PETSC_TRUE; 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #if defined(PETSC_HAVE_ELEMENTAL) 1520 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1521 { 1522 Mat mat_elemental; 1523 PetscErrorCode ierr; 1524 PetscScalar *v; 1525 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1526 1527 PetscFunctionBegin; 1528 if (reuse == MAT_REUSE_MATRIX) { 1529 mat_elemental = *newmat; 1530 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1533 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1534 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1535 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1536 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1537 } 1538 1539 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1540 for (i=0; i<N; i++) cols[i] = i; 1541 for (i=0; i<m; i++) rows[i] = rstart + i; 1542 1543 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1544 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1545 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1546 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1549 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1550 1551 if (reuse == MAT_INPLACE_MATRIX) { 1552 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1553 } else { 1554 *newmat = mat_elemental; 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 #endif 1559 1560 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1561 { 1562 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1571 { 1572 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1581 { 1582 PetscErrorCode ierr; 1583 Mat_MPIDense *mat; 1584 PetscInt m,nloc,N; 1585 1586 PetscFunctionBegin; 1587 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1588 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1589 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1590 PetscInt sum; 1591 1592 if (n == PETSC_DECIDE) { 1593 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1594 } 1595 /* Check sum(n) = N */ 1596 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1597 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1598 1599 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1600 } 1601 1602 /* numeric phase */ 1603 mat = (Mat_MPIDense*)(*outmat)->data; 1604 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1605 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1606 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 #if defined(PETSC_HAVE_CUDA) 1611 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1612 { 1613 Mat B; 1614 Mat_MPIDense *m; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 if (reuse == MAT_INITIAL_MATRIX) { 1619 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1620 } else if (reuse == MAT_REUSE_MATRIX) { 1621 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1622 } 1623 1624 B = *newmat; 1625 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1626 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1627 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1628 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1643 m = (Mat_MPIDense*)(B)->data; 1644 if (m->A) { 1645 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1646 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1647 } 1648 B->ops->bindtocpu = NULL; 1649 B->offloadmask = PETSC_OFFLOAD_CPU; 1650 PetscFunctionReturn(0); 1651 } 1652 1653 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1654 { 1655 Mat B; 1656 Mat_MPIDense *m; 1657 PetscErrorCode ierr; 1658 1659 PetscFunctionBegin; 1660 if (reuse == MAT_INITIAL_MATRIX) { 1661 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1662 } else if (reuse == MAT_REUSE_MATRIX) { 1663 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1664 } 1665 1666 B = *newmat; 1667 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1668 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1669 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1684 m = (Mat_MPIDense*)(B)->data; 1685 if (m->A) { 1686 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1687 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1688 B->offloadmask = PETSC_OFFLOAD_BOTH; 1689 } else { 1690 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1691 } 1692 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1693 1694 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1695 PetscFunctionReturn(0); 1696 } 1697 #endif 1698 1699 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1700 { 1701 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1702 PetscErrorCode ierr; 1703 PetscInt lda; 1704 1705 PetscFunctionBegin; 1706 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1707 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1708 if (!a->cvec) { 1709 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1710 } 1711 a->vecinuse = col + 1; 1712 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1713 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1714 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1715 *v = a->cvec; 1716 PetscFunctionReturn(0); 1717 } 1718 1719 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1720 { 1721 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1726 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1727 a->vecinuse = 0; 1728 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1729 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1730 *v = NULL; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1735 { 1736 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1737 PetscErrorCode ierr; 1738 PetscInt lda; 1739 1740 PetscFunctionBegin; 1741 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1742 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1743 if (!a->cvec) { 1744 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1745 } 1746 a->vecinuse = col + 1; 1747 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1748 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1749 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1750 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1751 *v = a->cvec; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1756 { 1757 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1762 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1763 a->vecinuse = 0; 1764 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1765 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1766 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1767 *v = NULL; 1768 PetscFunctionReturn(0); 1769 } 1770 1771 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1772 { 1773 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1774 PetscErrorCode ierr; 1775 PetscInt lda; 1776 1777 PetscFunctionBegin; 1778 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1779 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1780 if (!a->cvec) { 1781 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1782 } 1783 a->vecinuse = col + 1; 1784 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1785 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1786 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1787 *v = a->cvec; 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1792 { 1793 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1798 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1799 a->vecinuse = 0; 1800 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1801 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1802 *v = NULL; 1803 PetscFunctionReturn(0); 1804 } 1805 1806 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1807 { 1808 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1809 Mat_MPIDense *c; 1810 PetscErrorCode ierr; 1811 MPI_Comm comm; 1812 PetscBool setup = PETSC_FALSE; 1813 1814 PetscFunctionBegin; 1815 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1816 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1817 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1818 if (!a->cmat) { 1819 setup = PETSC_TRUE; 1820 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1821 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1822 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1823 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1824 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1825 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1826 } else if (cend-cbegin != a->cmat->cmap->N) { 1827 setup = PETSC_TRUE; 1828 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1829 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1830 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1831 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1832 } 1833 c = (Mat_MPIDense*)a->cmat->data; 1834 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1835 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1836 if (setup) { /* do we really need this? */ 1837 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1838 } 1839 a->cmat->preallocated = PETSC_TRUE; 1840 a->cmat->assembled = PETSC_TRUE; 1841 a->matinuse = cbegin + 1; 1842 *v = a->cmat; 1843 PetscFunctionReturn(0); 1844 } 1845 1846 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1847 { 1848 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1849 Mat_MPIDense *c; 1850 PetscErrorCode ierr; 1851 1852 PetscFunctionBegin; 1853 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1854 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1855 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1856 a->matinuse = 0; 1857 c = (Mat_MPIDense*)a->cmat->data; 1858 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1859 *v = NULL; 1860 PetscFunctionReturn(0); 1861 } 1862 1863 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1864 { 1865 Mat_MPIDense *a; 1866 PetscErrorCode ierr; 1867 1868 PetscFunctionBegin; 1869 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1870 mat->data = (void*)a; 1871 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1872 1873 mat->insertmode = NOT_SET_VALUES; 1874 1875 /* build cache for off array entries formed */ 1876 a->donotstash = PETSC_FALSE; 1877 1878 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1879 1880 /* stuff used for matrix vector multiply */ 1881 a->lvec = NULL; 1882 a->Mvctx = NULL; 1883 a->roworiented = PETSC_TRUE; 1884 1885 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1886 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1887 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1888 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1889 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1890 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1891 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1892 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1893 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1894 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1895 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1896 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1898 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1899 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1900 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1904 #if defined(PETSC_HAVE_ELEMENTAL) 1905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1906 #endif 1907 #if defined(PETSC_HAVE_SCALAPACK) 1908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1909 #endif 1910 #if defined(PETSC_HAVE_CUDA) 1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1912 #endif 1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1916 #if defined(PETSC_HAVE_CUDA) 1917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1919 #endif 1920 1921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1923 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 /*MC 1928 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1929 1930 Options Database Keys: 1931 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1932 1933 Level: beginner 1934 1935 .seealso: 1936 1937 M*/ 1938 #if defined(PETSC_HAVE_CUDA) 1939 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1940 { 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 1945 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1946 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1947 PetscFunctionReturn(0); 1948 } 1949 #endif 1950 1951 /*MC 1952 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1953 1954 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1955 and MATMPIDENSE otherwise. 1956 1957 Options Database Keys: 1958 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1959 1960 Level: beginner 1961 1962 1963 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1964 M*/ 1965 1966 /*MC 1967 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1968 1969 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1970 and MATMPIDENSECUDA otherwise. 1971 1972 Options Database Keys: 1973 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1974 1975 Level: beginner 1976 1977 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1978 M*/ 1979 1980 /*@C 1981 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1982 1983 Collective 1984 1985 Input Parameters: 1986 . B - the matrix 1987 - data - optional location of matrix data. Set data=NULL for PETSc 1988 to control all matrix memory allocation. 1989 1990 Notes: 1991 The dense format is fully compatible with standard Fortran 77 1992 storage by columns. 1993 1994 The data input variable is intended primarily for Fortran programmers 1995 who wish to allocate their own matrix memory space. Most users should 1996 set data=NULL. 1997 1998 Level: intermediate 1999 2000 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2001 @*/ 2002 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2003 { 2004 PetscErrorCode ierr; 2005 2006 PetscFunctionBegin; 2007 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2008 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 /*@ 2013 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2014 array provided by the user. This is useful to avoid copying an array 2015 into a matrix 2016 2017 Not Collective 2018 2019 Input Parameters: 2020 + mat - the matrix 2021 - array - the array in column major order 2022 2023 Notes: 2024 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2025 freed when the matrix is destroyed. 2026 2027 Level: developer 2028 2029 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2030 2031 @*/ 2032 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2033 { 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2038 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2039 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2040 #if defined(PETSC_HAVE_CUDA) 2041 mat->offloadmask = PETSC_OFFLOAD_CPU; 2042 #endif 2043 PetscFunctionReturn(0); 2044 } 2045 2046 /*@ 2047 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2048 2049 Not Collective 2050 2051 Input Parameters: 2052 . mat - the matrix 2053 2054 Notes: 2055 You can only call this after a call to MatDensePlaceArray() 2056 2057 Level: developer 2058 2059 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2060 2061 @*/ 2062 PetscErrorCode MatDenseResetArray(Mat mat) 2063 { 2064 PetscErrorCode ierr; 2065 2066 PetscFunctionBegin; 2067 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2068 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2069 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 /*@ 2074 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2075 array provided by the user. This is useful to avoid copying an array 2076 into a matrix 2077 2078 Not Collective 2079 2080 Input Parameters: 2081 + mat - the matrix 2082 - array - the array in column major order 2083 2084 Notes: 2085 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2086 freed by the user. It will be freed when the matrix is destroyed. 2087 2088 Level: developer 2089 2090 .seealso: MatDenseGetArray(), VecReplaceArray() 2091 @*/ 2092 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2093 { 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2098 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2099 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2100 #if defined(PETSC_HAVE_CUDA) 2101 mat->offloadmask = PETSC_OFFLOAD_CPU; 2102 #endif 2103 PetscFunctionReturn(0); 2104 } 2105 2106 #if defined(PETSC_HAVE_CUDA) 2107 /*@C 2108 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2109 array provided by the user. This is useful to avoid copying an array 2110 into a matrix 2111 2112 Not Collective 2113 2114 Input Parameters: 2115 + mat - the matrix 2116 - array - the array in column major order 2117 2118 Notes: 2119 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2120 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2121 2122 Level: developer 2123 2124 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2125 @*/ 2126 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2127 { 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2132 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2133 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2134 mat->offloadmask = PETSC_OFFLOAD_GPU; 2135 PetscFunctionReturn(0); 2136 } 2137 2138 /*@C 2139 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2140 2141 Not Collective 2142 2143 Input Parameters: 2144 . mat - the matrix 2145 2146 Notes: 2147 You can only call this after a call to MatDenseCUDAPlaceArray() 2148 2149 Level: developer 2150 2151 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2152 2153 @*/ 2154 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2155 { 2156 PetscErrorCode ierr; 2157 2158 PetscFunctionBegin; 2159 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2160 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2161 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 /*@C 2166 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2167 array provided by the user. This is useful to avoid copying an array 2168 into a matrix 2169 2170 Not Collective 2171 2172 Input Parameters: 2173 + mat - the matrix 2174 - array - the array in column major order 2175 2176 Notes: 2177 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2178 The memory passed in CANNOT be freed by the user. It will be freed 2179 when the matrix is destroyed. The array should respect the matrix leading dimension. 2180 2181 Level: developer 2182 2183 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2184 @*/ 2185 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2186 { 2187 PetscErrorCode ierr; 2188 2189 PetscFunctionBegin; 2190 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2191 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2192 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2193 mat->offloadmask = PETSC_OFFLOAD_GPU; 2194 PetscFunctionReturn(0); 2195 } 2196 2197 /*@C 2198 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2199 2200 Not Collective 2201 2202 Input Parameters: 2203 . A - the matrix 2204 2205 Output Parameters 2206 . array - the GPU array in column major order 2207 2208 Notes: 2209 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2210 2211 Level: developer 2212 2213 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2214 @*/ 2215 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2216 { 2217 PetscErrorCode ierr; 2218 2219 PetscFunctionBegin; 2220 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2221 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2222 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2223 PetscFunctionReturn(0); 2224 } 2225 2226 /*@C 2227 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2228 2229 Not Collective 2230 2231 Input Parameters: 2232 + A - the matrix 2233 - array - the GPU array in column major order 2234 2235 Notes: 2236 2237 Level: developer 2238 2239 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2240 @*/ 2241 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2242 { 2243 PetscErrorCode ierr; 2244 2245 PetscFunctionBegin; 2246 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2247 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2248 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2249 A->offloadmask = PETSC_OFFLOAD_GPU; 2250 PetscFunctionReturn(0); 2251 } 2252 2253 /*@C 2254 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2255 2256 Not Collective 2257 2258 Input Parameters: 2259 . A - the matrix 2260 2261 Output Parameters 2262 . array - the GPU array in column major order 2263 2264 Notes: 2265 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2266 2267 Level: developer 2268 2269 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2270 @*/ 2271 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2272 { 2273 PetscErrorCode ierr; 2274 2275 PetscFunctionBegin; 2276 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2277 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 /*@C 2282 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2283 2284 Not Collective 2285 2286 Input Parameters: 2287 + A - the matrix 2288 - array - the GPU array in column major order 2289 2290 Notes: 2291 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2292 2293 Level: developer 2294 2295 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2296 @*/ 2297 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2298 { 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 /*@C 2307 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2308 2309 Not Collective 2310 2311 Input Parameters: 2312 . A - the matrix 2313 2314 Output Parameters 2315 . array - the GPU array in column major order 2316 2317 Notes: 2318 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2319 2320 Level: developer 2321 2322 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2323 @*/ 2324 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2325 { 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2330 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2331 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2332 PetscFunctionReturn(0); 2333 } 2334 2335 /*@C 2336 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2337 2338 Not Collective 2339 2340 Input Parameters: 2341 + A - the matrix 2342 - array - the GPU array in column major order 2343 2344 Notes: 2345 2346 Level: developer 2347 2348 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2349 @*/ 2350 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2351 { 2352 PetscErrorCode ierr; 2353 2354 PetscFunctionBegin; 2355 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2356 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2357 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2358 A->offloadmask = PETSC_OFFLOAD_GPU; 2359 PetscFunctionReturn(0); 2360 } 2361 #endif 2362 2363 /*@C 2364 MatCreateDense - Creates a matrix in dense format. 2365 2366 Collective 2367 2368 Input Parameters: 2369 + comm - MPI communicator 2370 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2371 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2372 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2373 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2374 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2375 to control all matrix memory allocation. 2376 2377 Output Parameter: 2378 . A - the matrix 2379 2380 Notes: 2381 The dense format is fully compatible with standard Fortran 77 2382 storage by columns. 2383 2384 The data input variable is intended primarily for Fortran programmers 2385 who wish to allocate their own matrix memory space. Most users should 2386 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2387 2388 The user MUST specify either the local or global matrix dimensions 2389 (possibly both). 2390 2391 Level: intermediate 2392 2393 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2394 @*/ 2395 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2396 { 2397 PetscErrorCode ierr; 2398 PetscMPIInt size; 2399 2400 PetscFunctionBegin; 2401 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2402 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2403 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2404 if (size > 1) { 2405 PetscBool havedata = (PetscBool)!!data; 2406 2407 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2408 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2409 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 2410 if (havedata) { /* user provided data array, so no need to assemble */ 2411 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2412 (*A)->assembled = PETSC_TRUE; 2413 } 2414 } else { 2415 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2416 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2417 } 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #if defined(PETSC_HAVE_CUDA) 2422 /*@C 2423 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2424 2425 Collective 2426 2427 Input Parameters: 2428 + comm - MPI communicator 2429 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2430 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2431 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2432 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2433 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2434 to control matrix memory allocation. 2435 2436 Output Parameter: 2437 . A - the matrix 2438 2439 Notes: 2440 2441 Level: intermediate 2442 2443 .seealso: MatCreate(), MatCreateDense() 2444 @*/ 2445 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2446 { 2447 PetscErrorCode ierr; 2448 PetscMPIInt size; 2449 2450 PetscFunctionBegin; 2451 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2452 PetscValidLogicalCollectiveBool(*A,!!data,6); 2453 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2454 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2455 if (size > 1) { 2456 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2457 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2458 if (data) { /* user provided data array, so no need to assemble */ 2459 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2460 (*A)->assembled = PETSC_TRUE; 2461 } 2462 } else { 2463 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2464 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 #endif 2469 2470 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2471 { 2472 Mat mat; 2473 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2474 PetscErrorCode ierr; 2475 2476 PetscFunctionBegin; 2477 *newmat = NULL; 2478 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2479 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2480 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2481 a = (Mat_MPIDense*)mat->data; 2482 2483 mat->factortype = A->factortype; 2484 mat->assembled = PETSC_TRUE; 2485 mat->preallocated = PETSC_TRUE; 2486 2487 mat->insertmode = NOT_SET_VALUES; 2488 a->donotstash = oldmat->donotstash; 2489 2490 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2491 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2492 2493 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2494 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2495 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2496 2497 *newmat = mat; 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2502 { 2503 PetscErrorCode ierr; 2504 PetscBool isbinary; 2505 #if defined(PETSC_HAVE_HDF5) 2506 PetscBool ishdf5; 2507 #endif 2508 2509 PetscFunctionBegin; 2510 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2511 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2512 /* force binary viewer to load .info file if it has not yet done so */ 2513 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2514 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2515 #if defined(PETSC_HAVE_HDF5) 2516 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2517 #endif 2518 if (isbinary) { 2519 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2520 #if defined(PETSC_HAVE_HDF5) 2521 } else if (ishdf5) { 2522 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2523 #endif 2524 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2529 { 2530 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2531 Mat a,b; 2532 PetscBool flg; 2533 PetscErrorCode ierr; 2534 2535 PetscFunctionBegin; 2536 a = matA->A; 2537 b = matB->A; 2538 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2539 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2550 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2551 ierr = PetscFree(atb);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2556 { 2557 PetscErrorCode ierr; 2558 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2559 2560 PetscFunctionBegin; 2561 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2562 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2563 ierr = PetscFree(abt);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2568 { 2569 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2570 Mat_TransMatMultDense *atb; 2571 PetscErrorCode ierr; 2572 MPI_Comm comm; 2573 PetscMPIInt size,*recvcounts; 2574 PetscScalar *carray,*sendbuf; 2575 const PetscScalar *atbarray; 2576 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2577 const PetscInt *ranges; 2578 2579 PetscFunctionBegin; 2580 MatCheckProduct(C,3); 2581 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2582 atb = (Mat_TransMatMultDense *)C->product->data; 2583 recvcounts = atb->recvcounts; 2584 sendbuf = atb->sendbuf; 2585 2586 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2587 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2588 2589 /* compute atbarray = aseq^T * bseq */ 2590 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2591 2592 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2593 2594 /* arrange atbarray into sendbuf */ 2595 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2596 for (proc=0, k=0; proc<size; proc++) { 2597 for (j=0; j<cN; j++) { 2598 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2599 } 2600 } 2601 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2602 2603 /* sum all atbarray to local values of C */ 2604 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2605 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2606 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2607 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2608 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2613 { 2614 PetscErrorCode ierr; 2615 MPI_Comm comm; 2616 PetscMPIInt size; 2617 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2618 Mat_TransMatMultDense *atb; 2619 PetscBool cisdense; 2620 PetscInt i; 2621 const PetscInt *ranges; 2622 2623 PetscFunctionBegin; 2624 MatCheckProduct(C,3); 2625 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2626 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2627 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2628 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2629 } 2630 2631 /* create matrix product C */ 2632 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2633 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2634 if (!cisdense) { 2635 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2636 } 2637 ierr = MatSetUp(C);CHKERRQ(ierr); 2638 2639 /* create data structure for reuse C */ 2640 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2641 ierr = PetscNew(&atb);CHKERRQ(ierr); 2642 cM = C->rmap->N; 2643 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2644 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2645 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2646 2647 C->product->data = atb; 2648 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2649 PetscFunctionReturn(0); 2650 } 2651 2652 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2653 { 2654 PetscErrorCode ierr; 2655 MPI_Comm comm; 2656 PetscMPIInt i, size; 2657 PetscInt maxRows, bufsiz; 2658 PetscMPIInt tag; 2659 PetscInt alg; 2660 Mat_MatTransMultDense *abt; 2661 Mat_Product *product = C->product; 2662 PetscBool flg; 2663 2664 PetscFunctionBegin; 2665 MatCheckProduct(C,4); 2666 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2667 /* check local size of A and B */ 2668 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2669 2670 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2671 alg = flg ? 0 : 1; 2672 2673 /* setup matrix product C */ 2674 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2675 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2676 ierr = MatSetUp(C);CHKERRQ(ierr); 2677 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2678 2679 /* create data structure for reuse C */ 2680 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2681 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2682 ierr = PetscNew(&abt);CHKERRQ(ierr); 2683 abt->tag = tag; 2684 abt->alg = alg; 2685 switch (alg) { 2686 case 1: /* alg: "cyclic" */ 2687 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2688 bufsiz = A->cmap->N * maxRows; 2689 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2690 break; 2691 default: /* alg: "allgatherv" */ 2692 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2693 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2694 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2695 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2696 break; 2697 } 2698 2699 C->product->data = abt; 2700 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2701 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2702 PetscFunctionReturn(0); 2703 } 2704 2705 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2706 { 2707 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2708 Mat_MatTransMultDense *abt; 2709 PetscErrorCode ierr; 2710 MPI_Comm comm; 2711 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2712 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2713 PetscInt i,cK=A->cmap->N,k,j,bn; 2714 PetscScalar _DOne=1.0,_DZero=0.0; 2715 const PetscScalar *av,*bv; 2716 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2717 MPI_Request reqs[2]; 2718 const PetscInt *ranges; 2719 2720 PetscFunctionBegin; 2721 MatCheckProduct(C,3); 2722 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2723 abt = (Mat_MatTransMultDense*)C->product->data; 2724 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2726 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2727 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2728 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2729 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2730 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2732 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2734 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2736 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2737 bn = B->rmap->n; 2738 if (blda == bn) { 2739 sendbuf = (PetscScalar*)bv; 2740 } else { 2741 sendbuf = abt->buf[0]; 2742 for (k = 0, i = 0; i < cK; i++) { 2743 for (j = 0; j < bn; j++, k++) { 2744 sendbuf[k] = bv[i * blda + j]; 2745 } 2746 } 2747 } 2748 if (size > 1) { 2749 sendto = (rank + size - 1) % size; 2750 recvfrom = (rank + size + 1) % size; 2751 } else { 2752 sendto = recvfrom = 0; 2753 } 2754 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2755 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2756 recvisfrom = rank; 2757 for (i = 0; i < size; i++) { 2758 /* we have finished receiving in sending, bufs can be read/modified */ 2759 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2760 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2761 2762 if (nextrecvisfrom != rank) { 2763 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2764 sendsiz = cK * bn; 2765 recvsiz = cK * nextbn; 2766 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2767 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2768 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2769 } 2770 2771 /* local aseq * sendbuf^T */ 2772 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2773 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2774 2775 if (nextrecvisfrom != rank) { 2776 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2777 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2778 } 2779 bn = nextbn; 2780 recvisfrom = nextrecvisfrom; 2781 sendbuf = recvbuf; 2782 } 2783 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2784 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2785 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2786 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2787 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 PetscFunctionReturn(0); 2789 } 2790 2791 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2792 { 2793 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2794 Mat_MatTransMultDense *abt; 2795 PetscErrorCode ierr; 2796 MPI_Comm comm; 2797 PetscMPIInt size; 2798 PetscScalar *cv, *sendbuf, *recvbuf; 2799 const PetscScalar *av,*bv; 2800 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2801 PetscScalar _DOne=1.0,_DZero=0.0; 2802 PetscBLASInt cm, cn, ck, alda, clda; 2803 2804 PetscFunctionBegin; 2805 MatCheckProduct(C,3); 2806 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2807 abt = (Mat_MatTransMultDense*)C->product->data; 2808 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2810 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2811 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2812 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2813 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2814 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2815 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2816 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2817 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2818 /* copy transpose of B into buf[0] */ 2819 bn = B->rmap->n; 2820 sendbuf = abt->buf[0]; 2821 recvbuf = abt->buf[1]; 2822 for (k = 0, j = 0; j < bn; j++) { 2823 for (i = 0; i < cK; i++, k++) { 2824 sendbuf[k] = bv[i * blda + j]; 2825 } 2826 } 2827 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2828 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2829 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2830 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2831 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2832 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2833 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2834 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2835 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2836 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2837 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2838 PetscFunctionReturn(0); 2839 } 2840 2841 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2842 { 2843 Mat_MatTransMultDense *abt; 2844 PetscErrorCode ierr; 2845 2846 PetscFunctionBegin; 2847 MatCheckProduct(C,3); 2848 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2849 abt = (Mat_MatTransMultDense*)C->product->data; 2850 switch (abt->alg) { 2851 case 1: 2852 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2853 break; 2854 default: 2855 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2856 break; 2857 } 2858 PetscFunctionReturn(0); 2859 } 2860 2861 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2862 { 2863 PetscErrorCode ierr; 2864 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2865 2866 PetscFunctionBegin; 2867 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2868 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2869 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2870 ierr = PetscFree(ab);CHKERRQ(ierr); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #if defined(PETSC_HAVE_ELEMENTAL) 2875 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2876 { 2877 PetscErrorCode ierr; 2878 Mat_MatMultDense *ab; 2879 2880 PetscFunctionBegin; 2881 MatCheckProduct(C,3); 2882 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2883 ab = (Mat_MatMultDense*)C->product->data; 2884 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2885 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2886 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2887 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2888 PetscFunctionReturn(0); 2889 } 2890 2891 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2892 { 2893 PetscErrorCode ierr; 2894 Mat Ae,Be,Ce; 2895 Mat_MatMultDense *ab; 2896 2897 PetscFunctionBegin; 2898 MatCheckProduct(C,4); 2899 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2900 /* check local size of A and B */ 2901 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2902 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2903 } 2904 2905 /* create elemental matrices Ae and Be */ 2906 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2907 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2908 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2909 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2910 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2911 2912 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2913 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2914 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2915 ierr = MatSetUp(Be);CHKERRQ(ierr); 2916 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2917 2918 /* compute symbolic Ce = Ae*Be */ 2919 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2920 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2921 2922 /* setup C */ 2923 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2924 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2925 ierr = MatSetUp(C);CHKERRQ(ierr); 2926 2927 /* create data structure for reuse Cdense */ 2928 ierr = PetscNew(&ab);CHKERRQ(ierr); 2929 ab->Ae = Ae; 2930 ab->Be = Be; 2931 ab->Ce = Ce; 2932 2933 C->product->data = ab; 2934 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2935 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2936 PetscFunctionReturn(0); 2937 } 2938 #endif 2939 /* ----------------------------------------------- */ 2940 #if defined(PETSC_HAVE_ELEMENTAL) 2941 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2942 { 2943 PetscFunctionBegin; 2944 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2945 C->ops->productsymbolic = MatProductSymbolic_AB; 2946 PetscFunctionReturn(0); 2947 } 2948 #endif 2949 2950 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2951 { 2952 Mat_Product *product = C->product; 2953 Mat A = product->A,B=product->B; 2954 2955 PetscFunctionBegin; 2956 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2957 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2958 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2959 C->ops->productsymbolic = MatProductSymbolic_AtB; 2960 PetscFunctionReturn(0); 2961 } 2962 2963 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2964 { 2965 PetscErrorCode ierr; 2966 Mat_Product *product = C->product; 2967 const char *algTypes[2] = {"allgatherv","cyclic"}; 2968 PetscInt alg,nalg = 2; 2969 PetscBool flg = PETSC_FALSE; 2970 2971 PetscFunctionBegin; 2972 /* Set default algorithm */ 2973 alg = 0; /* default is allgatherv */ 2974 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2975 if (flg) { 2976 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2977 } 2978 2979 /* Get runtime option */ 2980 if (product->api_user) { 2981 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2982 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2983 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2984 } else { 2985 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2986 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2987 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2988 } 2989 if (flg) { 2990 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2991 } 2992 2993 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2994 C->ops->productsymbolic = MatProductSymbolic_ABt; 2995 PetscFunctionReturn(0); 2996 } 2997 2998 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2999 { 3000 PetscErrorCode ierr; 3001 Mat_Product *product = C->product; 3002 3003 PetscFunctionBegin; 3004 switch (product->type) { 3005 #if defined(PETSC_HAVE_ELEMENTAL) 3006 case MATPRODUCT_AB: 3007 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3008 break; 3009 #endif 3010 case MATPRODUCT_AtB: 3011 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3012 break; 3013 case MATPRODUCT_ABt: 3014 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3015 break; 3016 default: 3017 break; 3018 } 3019 PetscFunctionReturn(0); 3020 } 3021