1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40 { 41 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44 45 PetscFunctionBegin; 46 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47 lrow = row - rstart; 48 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscErrorCode ierr; 56 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57 58 PetscFunctionBegin; 59 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60 lrow = row - rstart; 61 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 66 { 67 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 70 PetscScalar *array; 71 MPI_Comm comm; 72 PetscBool flg; 73 Mat B; 74 75 PetscFunctionBegin; 76 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 77 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 80 81 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 82 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 100 { 101 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102 PetscErrorCode ierr; 103 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104 PetscBool roworiented = A->roworiented; 105 106 PetscFunctionBegin; 107 for (i=0; i<m; i++) { 108 if (idxm[i] < 0) continue; 109 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 110 if (idxm[i] >= rstart && idxm[i] < rend) { 111 row = idxm[i] - rstart; 112 if (roworiented) { 113 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 114 } else { 115 for (j=0; j<n; j++) { 116 if (idxn[j] < 0) continue; 117 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 118 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 119 } 120 } 121 } else if (!A->donotstash) { 122 mat->assembled = PETSC_FALSE; 123 if (roworiented) { 124 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125 } else { 126 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 127 } 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134 { 135 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136 PetscErrorCode ierr; 137 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 for (j=0; j<n; j++) { 146 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149 } 150 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 156 { 157 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 166 { 167 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168 PetscBool iscuda; 169 PetscErrorCode ierr; 170 171 PetscFunctionBegin; 172 if (!a->A) { 173 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 174 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 175 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 176 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 177 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 178 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 179 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 180 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 181 } 182 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 193 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 198 { 199 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 200 PetscErrorCode ierr; 201 202 PetscFunctionBegin; 203 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 204 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 215 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 220 { 221 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 222 PetscErrorCode ierr; 223 224 PetscFunctionBegin; 225 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 226 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 227 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 238 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 239 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 244 { 245 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 246 PetscErrorCode ierr; 247 248 PetscFunctionBegin; 249 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 250 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 251 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 256 { 257 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 258 PetscErrorCode ierr; 259 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 260 const PetscInt *irow,*icol; 261 const PetscScalar *v; 262 PetscScalar *bv; 263 Mat newmat; 264 IS iscol_local; 265 MPI_Comm comm_is,comm_mat; 266 267 PetscFunctionBegin; 268 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 269 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 270 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 271 272 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 273 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 274 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 275 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 276 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 277 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 278 279 /* No parallel redistribution currently supported! Should really check each index set 280 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 281 original matrix! */ 282 283 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 284 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 285 286 /* Check submatrix call */ 287 if (scall == MAT_REUSE_MATRIX) { 288 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 289 /* Really need to test rows and column sizes! */ 290 newmat = *B; 291 } else { 292 /* Create and fill new matrix */ 293 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 294 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 295 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 296 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 297 } 298 299 /* Now extract the data pointers and do the copy, column at a time */ 300 newmatd = (Mat_MPIDense*)newmat->data; 301 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 302 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 303 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 304 for (i=0; i<Ncols; i++) { 305 const PetscScalar *av = v + lda*icol[i]; 306 for (j=0; j<nrows; j++) { 307 *bv++ = av[irow[j] - rstart]; 308 } 309 } 310 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 311 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 312 313 /* Assemble the matrices so that the correct flags are set */ 314 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 316 317 /* Free work space */ 318 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 319 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 320 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 321 *B = newmat; 322 PetscFunctionReturn(0); 323 } 324 325 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 326 { 327 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 328 PetscErrorCode ierr; 329 330 PetscFunctionBegin; 331 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 336 { 337 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 338 PetscErrorCode ierr; 339 340 PetscFunctionBegin; 341 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 346 { 347 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 348 PetscErrorCode ierr; 349 350 PetscFunctionBegin; 351 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 356 { 357 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 358 PetscErrorCode ierr; 359 PetscInt nstash,reallocs; 360 361 PetscFunctionBegin; 362 if (mdn->donotstash || mat->nooffprocentries) return(0); 363 364 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 365 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 366 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 371 { 372 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 373 PetscErrorCode ierr; 374 PetscInt i,*row,*col,flg,j,rstart,ncols; 375 PetscMPIInt n; 376 PetscScalar *val; 377 378 PetscFunctionBegin; 379 if (!mdn->donotstash && !mat->nooffprocentries) { 380 /* wait on receives */ 381 while (1) { 382 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 383 if (!flg) break; 384 385 for (i=0; i<n;) { 386 /* Now identify the consecutive vals belonging to the same row */ 387 for (j=i,rstart=row[j]; j<n; j++) { 388 if (row[j] != rstart) break; 389 } 390 if (j < n) ncols = j-i; 391 else ncols = n-i; 392 /* Now assemble all these values with a single function call */ 393 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 394 i = j; 395 } 396 } 397 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 398 } 399 400 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 402 403 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 404 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 405 } 406 PetscFunctionReturn(0); 407 } 408 409 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 410 { 411 PetscErrorCode ierr; 412 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 413 414 PetscFunctionBegin; 415 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 420 { 421 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 422 PetscErrorCode ierr; 423 PetscInt i,len,*lrows; 424 425 PetscFunctionBegin; 426 /* get locally owned rows */ 427 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 428 /* fix right hand side if needed */ 429 if (x && b) { 430 const PetscScalar *xx; 431 PetscScalar *bb; 432 433 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 434 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 435 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 436 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 437 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 438 } 439 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 440 if (diag != 0.0) { 441 Vec d; 442 443 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 444 ierr = VecSet(d,diag);CHKERRQ(ierr); 445 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 446 ierr = VecDestroy(&d);CHKERRQ(ierr); 447 } 448 ierr = PetscFree(lrows);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 453 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 454 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 455 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 456 457 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 458 { 459 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 460 PetscErrorCode ierr; 461 const PetscScalar *ax; 462 PetscScalar *ay; 463 464 PetscFunctionBegin; 465 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 466 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 467 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 468 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 469 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 470 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 471 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 476 { 477 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 478 PetscErrorCode ierr; 479 const PetscScalar *ax; 480 PetscScalar *ay; 481 482 PetscFunctionBegin; 483 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 484 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 485 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 486 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 487 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 488 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 489 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 494 { 495 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 496 PetscErrorCode ierr; 497 const PetscScalar *ax; 498 PetscScalar *ay; 499 500 PetscFunctionBegin; 501 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 502 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 503 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 504 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 505 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 506 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 507 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 508 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 513 { 514 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 515 PetscErrorCode ierr; 516 const PetscScalar *ax; 517 PetscScalar *ay; 518 519 PetscFunctionBegin; 520 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 521 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 522 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 523 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 524 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 525 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 526 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 527 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 532 { 533 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 534 PetscErrorCode ierr; 535 PetscInt lda,len,i,n,m = A->rmap->n,radd; 536 PetscScalar *x,zero = 0.0; 537 const PetscScalar *av; 538 539 PetscFunctionBegin; 540 ierr = VecSet(v,zero);CHKERRQ(ierr); 541 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 542 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 543 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 544 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 545 radd = A->rmap->rstart*m; 546 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 547 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 548 for (i=0; i<len; i++) { 549 x[i] = av[radd + i*lda + i]; 550 } 551 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 552 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 553 PetscFunctionReturn(0); 554 } 555 556 PetscErrorCode MatDestroy_MPIDense(Mat mat) 557 { 558 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 559 PetscErrorCode ierr; 560 561 PetscFunctionBegin; 562 #if defined(PETSC_USE_LOG) 563 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 564 #endif 565 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 566 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 567 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 568 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 569 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 570 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 571 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 572 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 573 574 ierr = PetscFree(mat->data);CHKERRQ(ierr); 575 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 576 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 588 #if defined(PETSC_HAVE_ELEMENTAL) 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 590 #endif 591 #if defined(PETSC_HAVE_SCALAPACK) 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 593 #endif 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 597 #if defined (PETSC_HAVE_CUDA) 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 600 #endif 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 609 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 611 PetscFunctionReturn(0); 612 } 613 614 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 615 616 #include <petscdraw.h> 617 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 618 { 619 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 620 PetscErrorCode ierr; 621 PetscMPIInt rank; 622 PetscViewerType vtype; 623 PetscBool iascii,isdraw; 624 PetscViewer sviewer; 625 PetscViewerFormat format; 626 627 PetscFunctionBegin; 628 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 629 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 630 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 631 if (iascii) { 632 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 633 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 634 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 635 MatInfo info; 636 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 637 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 638 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 639 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 640 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 641 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 642 PetscFunctionReturn(0); 643 } else if (format == PETSC_VIEWER_ASCII_INFO) { 644 PetscFunctionReturn(0); 645 } 646 } else if (isdraw) { 647 PetscDraw draw; 648 PetscBool isnull; 649 650 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 651 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 652 if (isnull) PetscFunctionReturn(0); 653 } 654 655 { 656 /* assemble the entire matrix onto first processor. */ 657 Mat A; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 659 PetscInt *cols; 660 PetscScalar *vals; 661 662 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 669 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 670 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 671 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 672 673 /* Copy the matrix ... This isn't the most efficient means, 674 but it's quick for now */ 675 A->insertmode = INSERT_VALUES; 676 677 row = mat->rmap->rstart; 678 m = mdn->A->rmap->n; 679 for (i=0; i<m; i++) { 680 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 681 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 682 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 683 row++; 684 } 685 686 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 688 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 689 if (!rank) { 690 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 691 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 692 } 693 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 694 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 695 ierr = MatDestroy(&A);CHKERRQ(ierr); 696 } 697 PetscFunctionReturn(0); 698 } 699 700 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 701 { 702 PetscErrorCode ierr; 703 PetscBool iascii,isbinary,isdraw,issocket; 704 705 PetscFunctionBegin; 706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 708 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 709 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 710 711 if (iascii || issocket || isdraw) { 712 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 713 } else if (isbinary) { 714 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 720 { 721 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 722 Mat mdn = mat->A; 723 PetscErrorCode ierr; 724 PetscLogDouble isend[5],irecv[5]; 725 726 PetscFunctionBegin; 727 info->block_size = 1.0; 728 729 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 730 731 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 732 isend[3] = info->memory; isend[4] = info->mallocs; 733 if (flag == MAT_LOCAL) { 734 info->nz_used = isend[0]; 735 info->nz_allocated = isend[1]; 736 info->nz_unneeded = isend[2]; 737 info->memory = isend[3]; 738 info->mallocs = isend[4]; 739 } else if (flag == MAT_GLOBAL_MAX) { 740 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 741 742 info->nz_used = irecv[0]; 743 info->nz_allocated = irecv[1]; 744 info->nz_unneeded = irecv[2]; 745 info->memory = irecv[3]; 746 info->mallocs = irecv[4]; 747 } else if (flag == MAT_GLOBAL_SUM) { 748 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 749 750 info->nz_used = irecv[0]; 751 info->nz_allocated = irecv[1]; 752 info->nz_unneeded = irecv[2]; 753 info->memory = irecv[3]; 754 info->mallocs = irecv[4]; 755 } 756 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 757 info->fill_ratio_needed = 0; 758 info->factor_mallocs = 0; 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 763 { 764 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 switch (op) { 769 case MAT_NEW_NONZERO_LOCATIONS: 770 case MAT_NEW_NONZERO_LOCATION_ERR: 771 case MAT_NEW_NONZERO_ALLOCATION_ERR: 772 MatCheckPreallocated(A,1); 773 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 774 break; 775 case MAT_ROW_ORIENTED: 776 MatCheckPreallocated(A,1); 777 a->roworiented = flg; 778 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 779 break; 780 case MAT_NEW_DIAGONALS: 781 case MAT_KEEP_NONZERO_PATTERN: 782 case MAT_USE_HASH_TABLE: 783 case MAT_SORTED_FULL: 784 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 785 break; 786 case MAT_IGNORE_OFF_PROC_ENTRIES: 787 a->donotstash = flg; 788 break; 789 case MAT_SYMMETRIC: 790 case MAT_STRUCTURALLY_SYMMETRIC: 791 case MAT_HERMITIAN: 792 case MAT_SYMMETRY_ETERNAL: 793 case MAT_IGNORE_LOWER_TRIANGULAR: 794 case MAT_IGNORE_ZERO_ENTRIES: 795 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 796 break; 797 default: 798 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 799 } 800 PetscFunctionReturn(0); 801 } 802 803 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 804 { 805 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 806 const PetscScalar *l; 807 PetscScalar x,*v,*vv,*r; 808 PetscErrorCode ierr; 809 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 810 811 PetscFunctionBegin; 812 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 813 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 814 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 815 if (ll) { 816 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 817 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 818 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 819 for (i=0; i<m; i++) { 820 x = l[i]; 821 v = vv + i; 822 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 823 } 824 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 825 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 826 } 827 if (rr) { 828 const PetscScalar *ar; 829 830 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 831 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 832 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 833 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 834 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 835 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 836 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 837 for (i=0; i<n; i++) { 838 x = r[i]; 839 v = vv + i*lda; 840 for (j=0; j<m; j++) (*v++) *= x; 841 } 842 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 843 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 844 } 845 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 849 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 850 { 851 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 852 PetscErrorCode ierr; 853 PetscInt i,j; 854 PetscMPIInt size; 855 PetscReal sum = 0.0; 856 const PetscScalar *av,*v; 857 858 PetscFunctionBegin; 859 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 860 v = av; 861 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 862 if (size == 1) { 863 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 864 } else { 865 if (type == NORM_FROBENIUS) { 866 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 867 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 868 } 869 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 870 *nrm = PetscSqrtReal(*nrm); 871 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 872 } else if (type == NORM_1) { 873 PetscReal *tmp,*tmp2; 874 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 875 *nrm = 0.0; 876 v = av; 877 for (j=0; j<mdn->A->cmap->n; j++) { 878 for (i=0; i<mdn->A->rmap->n; i++) { 879 tmp[j] += PetscAbsScalar(*v); v++; 880 } 881 } 882 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 883 for (j=0; j<A->cmap->N; j++) { 884 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 885 } 886 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 887 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 888 } else if (type == NORM_INFINITY) { /* max row norm */ 889 PetscReal ntemp; 890 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 891 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 892 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 893 } 894 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 899 { 900 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 901 Mat B; 902 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 903 PetscErrorCode ierr; 904 PetscInt j,i,lda; 905 PetscScalar *v; 906 907 PetscFunctionBegin; 908 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 909 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 910 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 911 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 912 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 913 } else B = *matout; 914 915 m = a->A->rmap->n; n = a->A->cmap->n; 916 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 917 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 918 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 919 for (i=0; i<m; i++) rwork[i] = rstart + i; 920 for (j=0; j<n; j++) { 921 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 922 v += lda; 923 } 924 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 925 ierr = PetscFree(rwork);CHKERRQ(ierr); 926 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 927 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 928 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 929 *matout = B; 930 } else { 931 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 937 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 938 939 PetscErrorCode MatSetUp_MPIDense(Mat A) 940 { 941 PetscErrorCode ierr; 942 943 PetscFunctionBegin; 944 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 945 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 946 if (!A->preallocated) { 947 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 948 } 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 953 { 954 PetscErrorCode ierr; 955 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 956 957 PetscFunctionBegin; 958 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 PetscErrorCode MatConjugate_MPIDense(Mat mat) 963 { 964 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 ierr = MatConjugate(a->A);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatRealPart_MPIDense(Mat A) 973 { 974 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 975 PetscErrorCode ierr; 976 977 PetscFunctionBegin; 978 ierr = MatRealPart(a->A);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 983 { 984 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 993 { 994 PetscErrorCode ierr; 995 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 996 997 PetscFunctionBegin; 998 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 999 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1000 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1005 1006 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1007 { 1008 PetscErrorCode ierr; 1009 PetscInt i,n; 1010 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1011 PetscReal *work; 1012 1013 PetscFunctionBegin; 1014 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1015 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1016 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1017 if (type == NORM_2) { 1018 for (i=0; i<n; i++) work[i] *= work[i]; 1019 } 1020 if (type == NORM_INFINITY) { 1021 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1022 } else { 1023 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1024 } 1025 ierr = PetscFree(work);CHKERRQ(ierr); 1026 if (type == NORM_2) { 1027 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1028 } 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #if defined(PETSC_HAVE_CUDA) 1033 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1034 { 1035 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1036 PetscErrorCode ierr; 1037 PetscInt lda; 1038 1039 PetscFunctionBegin; 1040 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1041 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1042 if (!a->cvec) { 1043 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1044 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1045 } 1046 a->vecinuse = col + 1; 1047 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1048 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1049 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1050 *v = a->cvec; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1055 { 1056 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1061 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1062 a->vecinuse = 0; 1063 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1064 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1065 *v = NULL; 1066 PetscFunctionReturn(0); 1067 } 1068 1069 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1070 { 1071 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1072 PetscInt lda; 1073 PetscErrorCode ierr; 1074 1075 PetscFunctionBegin; 1076 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1077 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1078 if (!a->cvec) { 1079 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1080 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1081 } 1082 a->vecinuse = col + 1; 1083 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1084 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1085 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1086 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1087 *v = a->cvec; 1088 PetscFunctionReturn(0); 1089 } 1090 1091 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1092 { 1093 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1098 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1099 a->vecinuse = 0; 1100 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1101 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1102 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1103 *v = NULL; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1108 { 1109 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1110 PetscErrorCode ierr; 1111 PetscInt lda; 1112 1113 PetscFunctionBegin; 1114 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1115 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1116 if (!a->cvec) { 1117 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1118 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1119 } 1120 a->vecinuse = col + 1; 1121 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1122 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1123 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1124 *v = a->cvec; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1129 { 1130 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1135 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1136 a->vecinuse = 0; 1137 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1138 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1139 *v = NULL; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1144 { 1145 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1146 PetscErrorCode ierr; 1147 1148 PetscFunctionBegin; 1149 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1150 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1151 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1156 { 1157 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1158 PetscErrorCode ierr; 1159 1160 PetscFunctionBegin; 1161 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1162 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1163 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1168 { 1169 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1174 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1175 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1180 { 1181 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1182 PetscErrorCode ierr; 1183 1184 PetscFunctionBegin; 1185 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1190 { 1191 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1200 { 1201 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1210 { 1211 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1212 PetscErrorCode ierr; 1213 1214 PetscFunctionBegin; 1215 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1220 { 1221 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1230 { 1231 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1240 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1241 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1242 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1243 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1244 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1245 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1246 1247 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1248 { 1249 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1254 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1255 if (d->A) { 1256 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1257 } 1258 mat->boundtocpu = bind; 1259 if (!bind) { 1260 PetscBool iscuda; 1261 1262 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1263 if (!iscuda) { 1264 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1265 } 1266 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1267 if (!iscuda) { 1268 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1269 } 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1276 } else { 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1283 } 1284 if (d->cmat) { 1285 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1286 } 1287 PetscFunctionReturn(0); 1288 } 1289 1290 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1291 { 1292 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1293 PetscErrorCode ierr; 1294 PetscBool iscuda; 1295 1296 PetscFunctionBegin; 1297 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1298 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1299 if (!iscuda) PetscFunctionReturn(0); 1300 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1301 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1302 if (!d->A) { 1303 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1304 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1305 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1306 } 1307 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1308 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1309 A->preallocated = PETSC_TRUE; 1310 PetscFunctionReturn(0); 1311 } 1312 #endif 1313 1314 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1315 { 1316 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1325 { 1326 PetscFunctionBegin; 1327 *missing = PETSC_FALSE; 1328 PetscFunctionReturn(0); 1329 } 1330 1331 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1332 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1333 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1334 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1335 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1336 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1337 1338 /* -------------------------------------------------------------------*/ 1339 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1340 MatGetRow_MPIDense, 1341 MatRestoreRow_MPIDense, 1342 MatMult_MPIDense, 1343 /* 4*/ MatMultAdd_MPIDense, 1344 MatMultTranspose_MPIDense, 1345 MatMultTransposeAdd_MPIDense, 1346 0, 1347 0, 1348 0, 1349 /* 10*/ 0, 1350 0, 1351 0, 1352 0, 1353 MatTranspose_MPIDense, 1354 /* 15*/ MatGetInfo_MPIDense, 1355 MatEqual_MPIDense, 1356 MatGetDiagonal_MPIDense, 1357 MatDiagonalScale_MPIDense, 1358 MatNorm_MPIDense, 1359 /* 20*/ MatAssemblyBegin_MPIDense, 1360 MatAssemblyEnd_MPIDense, 1361 MatSetOption_MPIDense, 1362 MatZeroEntries_MPIDense, 1363 /* 24*/ MatZeroRows_MPIDense, 1364 0, 1365 0, 1366 0, 1367 0, 1368 /* 29*/ MatSetUp_MPIDense, 1369 0, 1370 0, 1371 MatGetDiagonalBlock_MPIDense, 1372 0, 1373 /* 34*/ MatDuplicate_MPIDense, 1374 0, 1375 0, 1376 0, 1377 0, 1378 /* 39*/ MatAXPY_MPIDense, 1379 MatCreateSubMatrices_MPIDense, 1380 0, 1381 MatGetValues_MPIDense, 1382 0, 1383 /* 44*/ 0, 1384 MatScale_MPIDense, 1385 MatShift_Basic, 1386 0, 1387 0, 1388 /* 49*/ MatSetRandom_MPIDense, 1389 0, 1390 0, 1391 0, 1392 0, 1393 /* 54*/ 0, 1394 0, 1395 0, 1396 0, 1397 0, 1398 /* 59*/ MatCreateSubMatrix_MPIDense, 1399 MatDestroy_MPIDense, 1400 MatView_MPIDense, 1401 0, 1402 0, 1403 /* 64*/ 0, 1404 0, 1405 0, 1406 0, 1407 0, 1408 /* 69*/ 0, 1409 0, 1410 0, 1411 0, 1412 0, 1413 /* 74*/ 0, 1414 0, 1415 0, 1416 0, 1417 0, 1418 /* 79*/ 0, 1419 0, 1420 0, 1421 0, 1422 /* 83*/ MatLoad_MPIDense, 1423 0, 1424 0, 1425 0, 1426 0, 1427 0, 1428 /* 89*/ 0, 1429 0, 1430 0, 1431 0, 1432 0, 1433 /* 94*/ 0, 1434 0, 1435 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1436 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1437 0, 1438 /* 99*/ MatProductSetFromOptions_MPIDense, 1439 0, 1440 0, 1441 MatConjugate_MPIDense, 1442 0, 1443 /*104*/ 0, 1444 MatRealPart_MPIDense, 1445 MatImaginaryPart_MPIDense, 1446 0, 1447 0, 1448 /*109*/ 0, 1449 0, 1450 0, 1451 MatGetColumnVector_MPIDense, 1452 MatMissingDiagonal_MPIDense, 1453 /*114*/ 0, 1454 0, 1455 0, 1456 0, 1457 0, 1458 /*119*/ 0, 1459 0, 1460 0, 1461 0, 1462 0, 1463 /*124*/ 0, 1464 MatGetColumnNorms_MPIDense, 1465 0, 1466 0, 1467 0, 1468 /*129*/ 0, 1469 0, 1470 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1471 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1472 0, 1473 /*134*/ 0, 1474 0, 1475 0, 1476 0, 1477 0, 1478 /*139*/ 0, 1479 0, 1480 0, 1481 0, 1482 0, 1483 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1484 /*145*/ 0, 1485 0, 1486 0 1487 }; 1488 1489 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1490 { 1491 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1492 PetscBool iscuda; 1493 PetscErrorCode ierr; 1494 1495 PetscFunctionBegin; 1496 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1497 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1498 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1499 if (!a->A) { 1500 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1501 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1502 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1503 } 1504 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1505 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1506 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1507 mat->preallocated = PETSC_TRUE; 1508 PetscFunctionReturn(0); 1509 } 1510 1511 #if defined(PETSC_HAVE_ELEMENTAL) 1512 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1513 { 1514 Mat mat_elemental; 1515 PetscErrorCode ierr; 1516 PetscScalar *v; 1517 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1518 1519 PetscFunctionBegin; 1520 if (reuse == MAT_REUSE_MATRIX) { 1521 mat_elemental = *newmat; 1522 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1523 } else { 1524 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1525 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1526 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1527 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1528 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1529 } 1530 1531 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1532 for (i=0; i<N; i++) cols[i] = i; 1533 for (i=0; i<m; i++) rows[i] = rstart + i; 1534 1535 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1536 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1537 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1538 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1539 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1540 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1541 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1542 1543 if (reuse == MAT_INPLACE_MATRIX) { 1544 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1545 } else { 1546 *newmat = mat_elemental; 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 #endif 1551 1552 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1553 { 1554 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1563 { 1564 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1573 { 1574 PetscErrorCode ierr; 1575 Mat_MPIDense *mat; 1576 PetscInt m,nloc,N; 1577 1578 PetscFunctionBegin; 1579 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1580 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1581 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1582 PetscInt sum; 1583 1584 if (n == PETSC_DECIDE) { 1585 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1586 } 1587 /* Check sum(n) = N */ 1588 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1589 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1590 1591 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1592 } 1593 1594 /* numeric phase */ 1595 mat = (Mat_MPIDense*)(*outmat)->data; 1596 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1597 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1598 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #if defined(PETSC_HAVE_CUDA) 1603 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1604 { 1605 Mat B; 1606 Mat_MPIDense *m; 1607 PetscErrorCode ierr; 1608 1609 PetscFunctionBegin; 1610 if (reuse == MAT_INITIAL_MATRIX) { 1611 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1612 } else if (reuse == MAT_REUSE_MATRIX) { 1613 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1614 } 1615 1616 B = *newmat; 1617 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1618 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1619 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1620 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1635 m = (Mat_MPIDense*)(B)->data; 1636 if (m->A) { 1637 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1638 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1639 } 1640 B->ops->bindtocpu = NULL; 1641 B->offloadmask = PETSC_OFFLOAD_CPU; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1646 { 1647 Mat B; 1648 Mat_MPIDense *m; 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 if (reuse == MAT_INITIAL_MATRIX) { 1653 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1654 } else if (reuse == MAT_REUSE_MATRIX) { 1655 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1656 } 1657 1658 B = *newmat; 1659 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1660 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1661 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1676 m = (Mat_MPIDense*)(B)->data; 1677 if (m->A) { 1678 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1679 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1680 B->offloadmask = PETSC_OFFLOAD_BOTH; 1681 } else { 1682 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1683 } 1684 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1685 1686 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1687 PetscFunctionReturn(0); 1688 } 1689 #endif 1690 1691 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1692 { 1693 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1694 PetscErrorCode ierr; 1695 PetscInt lda; 1696 1697 PetscFunctionBegin; 1698 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1699 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1700 if (!a->cvec) { 1701 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1702 } 1703 a->vecinuse = col + 1; 1704 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1705 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1706 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1707 *v = a->cvec; 1708 PetscFunctionReturn(0); 1709 } 1710 1711 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1712 { 1713 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1718 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1719 a->vecinuse = 0; 1720 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1721 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1722 *v = NULL; 1723 PetscFunctionReturn(0); 1724 } 1725 1726 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1727 { 1728 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1729 PetscErrorCode ierr; 1730 PetscInt lda; 1731 1732 PetscFunctionBegin; 1733 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1734 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1735 if (!a->cvec) { 1736 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1737 } 1738 a->vecinuse = col + 1; 1739 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1740 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1741 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1742 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1743 *v = a->cvec; 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1748 { 1749 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1750 PetscErrorCode ierr; 1751 1752 PetscFunctionBegin; 1753 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1754 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1755 a->vecinuse = 0; 1756 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1757 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1758 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1759 *v = NULL; 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1764 { 1765 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1766 PetscErrorCode ierr; 1767 PetscInt lda; 1768 1769 PetscFunctionBegin; 1770 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1771 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1772 if (!a->cvec) { 1773 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1774 } 1775 a->vecinuse = col + 1; 1776 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1777 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1778 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1779 *v = a->cvec; 1780 PetscFunctionReturn(0); 1781 } 1782 1783 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1784 { 1785 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1786 PetscErrorCode ierr; 1787 1788 PetscFunctionBegin; 1789 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1790 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1791 a->vecinuse = 0; 1792 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1793 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1794 *v = NULL; 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1799 { 1800 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1801 Mat_MPIDense *c; 1802 PetscErrorCode ierr; 1803 MPI_Comm comm; 1804 PetscBool setup = PETSC_FALSE; 1805 1806 PetscFunctionBegin; 1807 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1808 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1809 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1810 if (!a->cmat) { 1811 setup = PETSC_TRUE; 1812 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1813 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1814 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1815 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1816 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1817 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1818 } else if (cend-cbegin != a->cmat->cmap->N) { 1819 setup = PETSC_TRUE; 1820 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1821 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1822 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1823 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1824 } 1825 c = (Mat_MPIDense*)a->cmat->data; 1826 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1827 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1828 if (setup) { /* do we really need this? */ 1829 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1830 } 1831 a->cmat->preallocated = PETSC_TRUE; 1832 a->cmat->assembled = PETSC_TRUE; 1833 a->matinuse = cbegin + 1; 1834 *v = a->cmat; 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1839 { 1840 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1841 Mat_MPIDense *c; 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1846 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1847 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1848 a->matinuse = 0; 1849 c = (Mat_MPIDense*)a->cmat->data; 1850 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1851 *v = NULL; 1852 PetscFunctionReturn(0); 1853 } 1854 1855 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1856 { 1857 Mat_MPIDense *a; 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1862 mat->data = (void*)a; 1863 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1864 1865 mat->insertmode = NOT_SET_VALUES; 1866 1867 /* build cache for off array entries formed */ 1868 a->donotstash = PETSC_FALSE; 1869 1870 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1871 1872 /* stuff used for matrix vector multiply */ 1873 a->lvec = 0; 1874 a->Mvctx = 0; 1875 a->roworiented = PETSC_TRUE; 1876 1877 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1878 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1879 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1880 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1881 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1882 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1883 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1884 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1885 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1886 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1887 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1888 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1889 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1890 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1891 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1892 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1893 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1894 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1895 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1896 #if defined(PETSC_HAVE_ELEMENTAL) 1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1898 #endif 1899 #if defined(PETSC_HAVE_SCALAPACK) 1900 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1901 #endif 1902 #if defined(PETSC_HAVE_CUDA) 1903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1904 #endif 1905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1908 #if defined(PETSC_HAVE_CUDA) 1909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1911 #endif 1912 1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1915 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 /*MC 1920 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1921 1922 Options Database Keys: 1923 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1924 1925 Level: beginner 1926 1927 .seealso: 1928 1929 M*/ 1930 #if defined(PETSC_HAVE_CUDA) 1931 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1932 { 1933 PetscErrorCode ierr; 1934 1935 PetscFunctionBegin; 1936 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1937 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 #endif 1941 1942 /*MC 1943 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1944 1945 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1946 and MATMPIDENSE otherwise. 1947 1948 Options Database Keys: 1949 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1950 1951 Level: beginner 1952 1953 1954 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1955 M*/ 1956 1957 /*MC 1958 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1959 1960 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1961 and MATMPIDENSECUDA otherwise. 1962 1963 Options Database Keys: 1964 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1965 1966 Level: beginner 1967 1968 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1969 M*/ 1970 1971 /*@C 1972 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1973 1974 Not collective 1975 1976 Input Parameters: 1977 . B - the matrix 1978 - data - optional location of matrix data. Set data=NULL for PETSc 1979 to control all matrix memory allocation. 1980 1981 Notes: 1982 The dense format is fully compatible with standard Fortran 77 1983 storage by columns. 1984 1985 The data input variable is intended primarily for Fortran programmers 1986 who wish to allocate their own matrix memory space. Most users should 1987 set data=NULL. 1988 1989 Level: intermediate 1990 1991 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1992 @*/ 1993 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1994 { 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1999 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 /*@ 2004 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2005 array provided by the user. This is useful to avoid copying an array 2006 into a matrix 2007 2008 Not Collective 2009 2010 Input Parameters: 2011 + mat - the matrix 2012 - array - the array in column major order 2013 2014 Notes: 2015 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2016 freed when the matrix is destroyed. 2017 2018 Level: developer 2019 2020 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2021 2022 @*/ 2023 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2024 { 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2029 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2030 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2031 #if defined(PETSC_HAVE_CUDA) 2032 mat->offloadmask = PETSC_OFFLOAD_CPU; 2033 #endif 2034 PetscFunctionReturn(0); 2035 } 2036 2037 /*@ 2038 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2039 2040 Not Collective 2041 2042 Input Parameters: 2043 . mat - the matrix 2044 2045 Notes: 2046 You can only call this after a call to MatDensePlaceArray() 2047 2048 Level: developer 2049 2050 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2051 2052 @*/ 2053 PetscErrorCode MatDenseResetArray(Mat mat) 2054 { 2055 PetscErrorCode ierr; 2056 2057 PetscFunctionBegin; 2058 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2059 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2060 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2061 PetscFunctionReturn(0); 2062 } 2063 2064 /*@ 2065 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2066 array provided by the user. This is useful to avoid copying an array 2067 into a matrix 2068 2069 Not Collective 2070 2071 Input Parameters: 2072 + mat - the matrix 2073 - array - the array in column major order 2074 2075 Notes: 2076 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2077 freed by the user. It will be freed when the matrix is destroyed. 2078 2079 Level: developer 2080 2081 .seealso: MatDenseGetArray(), VecReplaceArray() 2082 @*/ 2083 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2084 { 2085 PetscErrorCode ierr; 2086 2087 PetscFunctionBegin; 2088 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2089 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2090 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2091 #if defined(PETSC_HAVE_CUDA) 2092 mat->offloadmask = PETSC_OFFLOAD_CPU; 2093 #endif 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #if defined(PETSC_HAVE_CUDA) 2098 /*@C 2099 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2100 array provided by the user. This is useful to avoid copying an array 2101 into a matrix 2102 2103 Not Collective 2104 2105 Input Parameters: 2106 + mat - the matrix 2107 - array - the array in column major order 2108 2109 Notes: 2110 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2111 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2112 2113 Level: developer 2114 2115 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2116 @*/ 2117 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2118 { 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2123 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2124 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2125 mat->offloadmask = PETSC_OFFLOAD_GPU; 2126 PetscFunctionReturn(0); 2127 } 2128 2129 /*@C 2130 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2131 2132 Not Collective 2133 2134 Input Parameters: 2135 . mat - the matrix 2136 2137 Notes: 2138 You can only call this after a call to MatDenseCUDAPlaceArray() 2139 2140 Level: developer 2141 2142 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2143 2144 @*/ 2145 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2146 { 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2151 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2152 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 /*@C 2157 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2158 array provided by the user. This is useful to avoid copying an array 2159 into a matrix 2160 2161 Not Collective 2162 2163 Input Parameters: 2164 + mat - the matrix 2165 - array - the array in column major order 2166 2167 Notes: 2168 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2169 The memory passed in CANNOT be freed by the user. It will be freed 2170 when the matrix is destroyed. The array should respect the matrix leading dimension. 2171 2172 Level: developer 2173 2174 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2175 @*/ 2176 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2177 { 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2182 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2183 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2184 mat->offloadmask = PETSC_OFFLOAD_GPU; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 /*@C 2189 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2190 2191 Not Collective 2192 2193 Input Parameters: 2194 . A - the matrix 2195 2196 Output Parameters 2197 . array - the GPU array in column major order 2198 2199 Notes: 2200 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2201 2202 Level: developer 2203 2204 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2205 @*/ 2206 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2207 { 2208 PetscErrorCode ierr; 2209 2210 PetscFunctionBegin; 2211 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2212 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2213 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 /*@C 2218 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2219 2220 Not Collective 2221 2222 Input Parameters: 2223 + A - the matrix 2224 - array - the GPU array in column major order 2225 2226 Notes: 2227 2228 Level: developer 2229 2230 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2231 @*/ 2232 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2233 { 2234 PetscErrorCode ierr; 2235 2236 PetscFunctionBegin; 2237 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2238 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2239 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2240 A->offloadmask = PETSC_OFFLOAD_GPU; 2241 PetscFunctionReturn(0); 2242 } 2243 2244 /*@C 2245 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2246 2247 Not Collective 2248 2249 Input Parameters: 2250 . A - the matrix 2251 2252 Output Parameters 2253 . array - the GPU array in column major order 2254 2255 Notes: 2256 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2257 2258 Level: developer 2259 2260 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2261 @*/ 2262 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2263 { 2264 PetscErrorCode ierr; 2265 2266 PetscFunctionBegin; 2267 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2268 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 /*@C 2273 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2274 2275 Not Collective 2276 2277 Input Parameters: 2278 + A - the matrix 2279 - array - the GPU array in column major order 2280 2281 Notes: 2282 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2283 2284 Level: developer 2285 2286 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2287 @*/ 2288 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2289 { 2290 PetscErrorCode ierr; 2291 2292 PetscFunctionBegin; 2293 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2294 PetscFunctionReturn(0); 2295 } 2296 2297 /*@C 2298 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2299 2300 Not Collective 2301 2302 Input Parameters: 2303 . A - the matrix 2304 2305 Output Parameters 2306 . array - the GPU array in column major order 2307 2308 Notes: 2309 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2310 2311 Level: developer 2312 2313 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2314 @*/ 2315 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2316 { 2317 PetscErrorCode ierr; 2318 2319 PetscFunctionBegin; 2320 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2321 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2322 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 /*@C 2327 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2328 2329 Not Collective 2330 2331 Input Parameters: 2332 + A - the matrix 2333 - array - the GPU array in column major order 2334 2335 Notes: 2336 2337 Level: developer 2338 2339 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2340 @*/ 2341 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2342 { 2343 PetscErrorCode ierr; 2344 2345 PetscFunctionBegin; 2346 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2347 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2348 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2349 A->offloadmask = PETSC_OFFLOAD_GPU; 2350 PetscFunctionReturn(0); 2351 } 2352 #endif 2353 2354 /*@C 2355 MatCreateDense - Creates a matrix in dense format. 2356 2357 Collective 2358 2359 Input Parameters: 2360 + comm - MPI communicator 2361 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2362 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2363 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2364 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2365 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2366 to control all matrix memory allocation. 2367 2368 Output Parameter: 2369 . A - the matrix 2370 2371 Notes: 2372 The dense format is fully compatible with standard Fortran 77 2373 storage by columns. 2374 2375 The data input variable is intended primarily for Fortran programmers 2376 who wish to allocate their own matrix memory space. Most users should 2377 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2378 2379 The user MUST specify either the local or global matrix dimensions 2380 (possibly both). 2381 2382 Level: intermediate 2383 2384 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2385 @*/ 2386 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2387 { 2388 PetscErrorCode ierr; 2389 PetscMPIInt size; 2390 2391 PetscFunctionBegin; 2392 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2393 PetscValidLogicalCollectiveBool(*A,!!data,6); 2394 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2395 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2396 if (size > 1) { 2397 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2398 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2399 if (data) { /* user provided data array, so no need to assemble */ 2400 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2401 (*A)->assembled = PETSC_TRUE; 2402 } 2403 } else { 2404 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2405 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2406 } 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #if defined(PETSC_HAVE_CUDA) 2411 /*@C 2412 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2413 2414 Collective 2415 2416 Input Parameters: 2417 + comm - MPI communicator 2418 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2419 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2420 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2421 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2422 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2423 to control matrix memory allocation. 2424 2425 Output Parameter: 2426 . A - the matrix 2427 2428 Notes: 2429 2430 Level: intermediate 2431 2432 .seealso: MatCreate(), MatCreateDense() 2433 @*/ 2434 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2435 { 2436 PetscErrorCode ierr; 2437 PetscMPIInt size; 2438 2439 PetscFunctionBegin; 2440 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2441 PetscValidLogicalCollectiveBool(*A,!!data,6); 2442 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2443 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2444 if (size > 1) { 2445 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2446 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2447 if (data) { /* user provided data array, so no need to assemble */ 2448 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2449 (*A)->assembled = PETSC_TRUE; 2450 } 2451 } else { 2452 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2453 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2454 } 2455 PetscFunctionReturn(0); 2456 } 2457 #endif 2458 2459 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2460 { 2461 Mat mat; 2462 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2463 PetscErrorCode ierr; 2464 2465 PetscFunctionBegin; 2466 *newmat = 0; 2467 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2468 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2469 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2470 a = (Mat_MPIDense*)mat->data; 2471 2472 mat->factortype = A->factortype; 2473 mat->assembled = PETSC_TRUE; 2474 mat->preallocated = PETSC_TRUE; 2475 2476 mat->insertmode = NOT_SET_VALUES; 2477 a->donotstash = oldmat->donotstash; 2478 2479 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2480 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2481 2482 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2483 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2484 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2485 2486 *newmat = mat; 2487 PetscFunctionReturn(0); 2488 } 2489 2490 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2491 { 2492 PetscErrorCode ierr; 2493 PetscBool isbinary; 2494 #if defined(PETSC_HAVE_HDF5) 2495 PetscBool ishdf5; 2496 #endif 2497 2498 PetscFunctionBegin; 2499 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2500 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2501 /* force binary viewer to load .info file if it has not yet done so */ 2502 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2503 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2504 #if defined(PETSC_HAVE_HDF5) 2505 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2506 #endif 2507 if (isbinary) { 2508 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2509 #if defined(PETSC_HAVE_HDF5) 2510 } else if (ishdf5) { 2511 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2512 #endif 2513 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2518 { 2519 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2520 Mat a,b; 2521 PetscBool flg; 2522 PetscErrorCode ierr; 2523 2524 PetscFunctionBegin; 2525 a = matA->A; 2526 b = matB->A; 2527 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2528 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2529 PetscFunctionReturn(0); 2530 } 2531 2532 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2533 { 2534 PetscErrorCode ierr; 2535 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2536 2537 PetscFunctionBegin; 2538 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2539 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2540 ierr = PetscFree(atb);CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2545 { 2546 PetscErrorCode ierr; 2547 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2548 2549 PetscFunctionBegin; 2550 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2551 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2552 ierr = PetscFree(abt);CHKERRQ(ierr); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2557 { 2558 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2559 Mat_TransMatMultDense *atb; 2560 PetscErrorCode ierr; 2561 MPI_Comm comm; 2562 PetscMPIInt size,*recvcounts; 2563 PetscScalar *carray,*sendbuf; 2564 const PetscScalar *atbarray; 2565 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2566 const PetscInt *ranges; 2567 2568 PetscFunctionBegin; 2569 MatCheckProduct(C,3); 2570 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2571 atb = (Mat_TransMatMultDense *)C->product->data; 2572 recvcounts = atb->recvcounts; 2573 sendbuf = atb->sendbuf; 2574 2575 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2576 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2577 2578 /* compute atbarray = aseq^T * bseq */ 2579 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2580 2581 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2582 2583 /* arrange atbarray into sendbuf */ 2584 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2585 for (proc=0, k=0; proc<size; proc++) { 2586 for (j=0; j<cN; j++) { 2587 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2588 } 2589 } 2590 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2591 2592 /* sum all atbarray to local values of C */ 2593 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2594 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2595 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2596 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2597 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2602 { 2603 PetscErrorCode ierr; 2604 MPI_Comm comm; 2605 PetscMPIInt size; 2606 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2607 Mat_TransMatMultDense *atb; 2608 PetscBool cisdense; 2609 PetscInt i; 2610 const PetscInt *ranges; 2611 2612 PetscFunctionBegin; 2613 MatCheckProduct(C,3); 2614 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2615 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2616 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2617 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2618 } 2619 2620 /* create matrix product C */ 2621 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2622 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2623 if (!cisdense) { 2624 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2625 } 2626 ierr = MatSetUp(C);CHKERRQ(ierr); 2627 2628 /* create data structure for reuse C */ 2629 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2630 ierr = PetscNew(&atb);CHKERRQ(ierr); 2631 cM = C->rmap->N; 2632 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2633 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2634 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2635 2636 C->product->data = atb; 2637 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2638 PetscFunctionReturn(0); 2639 } 2640 2641 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2642 { 2643 PetscErrorCode ierr; 2644 MPI_Comm comm; 2645 PetscMPIInt i, size; 2646 PetscInt maxRows, bufsiz; 2647 PetscMPIInt tag; 2648 PetscInt alg; 2649 Mat_MatTransMultDense *abt; 2650 Mat_Product *product = C->product; 2651 PetscBool flg; 2652 2653 PetscFunctionBegin; 2654 MatCheckProduct(C,4); 2655 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2656 /* check local size of A and B */ 2657 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2658 2659 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2660 alg = flg ? 0 : 1; 2661 2662 /* setup matrix product C */ 2663 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2664 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2665 ierr = MatSetUp(C);CHKERRQ(ierr); 2666 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2667 2668 /* create data structure for reuse C */ 2669 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2670 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2671 ierr = PetscNew(&abt);CHKERRQ(ierr); 2672 abt->tag = tag; 2673 abt->alg = alg; 2674 switch (alg) { 2675 case 1: /* alg: "cyclic" */ 2676 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2677 bufsiz = A->cmap->N * maxRows; 2678 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2679 break; 2680 default: /* alg: "allgatherv" */ 2681 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2682 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2683 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2684 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2685 break; 2686 } 2687 2688 C->product->data = abt; 2689 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2690 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2691 PetscFunctionReturn(0); 2692 } 2693 2694 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2695 { 2696 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2697 Mat_MatTransMultDense *abt; 2698 PetscErrorCode ierr; 2699 MPI_Comm comm; 2700 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2701 PetscScalar *sendbuf, *recvbuf=0, *cv; 2702 PetscInt i,cK=A->cmap->N,k,j,bn; 2703 PetscScalar _DOne=1.0,_DZero=0.0; 2704 const PetscScalar *av,*bv; 2705 PetscBLASInt cm, cn, ck, alda, blda, clda; 2706 MPI_Request reqs[2]; 2707 const PetscInt *ranges; 2708 2709 PetscFunctionBegin; 2710 MatCheckProduct(C,3); 2711 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2712 abt = (Mat_MatTransMultDense*)C->product->data; 2713 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2714 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2715 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2716 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2717 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2718 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2719 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2720 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2721 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2723 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2725 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2726 bn = B->rmap->n; 2727 if (blda == bn) { 2728 sendbuf = (PetscScalar*)bv; 2729 } else { 2730 sendbuf = abt->buf[0]; 2731 for (k = 0, i = 0; i < cK; i++) { 2732 for (j = 0; j < bn; j++, k++) { 2733 sendbuf[k] = bv[i * blda + j]; 2734 } 2735 } 2736 } 2737 if (size > 1) { 2738 sendto = (rank + size - 1) % size; 2739 recvfrom = (rank + size + 1) % size; 2740 } else { 2741 sendto = recvfrom = 0; 2742 } 2743 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2745 recvisfrom = rank; 2746 for (i = 0; i < size; i++) { 2747 /* we have finished receiving in sending, bufs can be read/modified */ 2748 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2749 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2750 2751 if (nextrecvisfrom != rank) { 2752 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2753 sendsiz = cK * bn; 2754 recvsiz = cK * nextbn; 2755 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2756 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2757 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2758 } 2759 2760 /* local aseq * sendbuf^T */ 2761 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2762 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2763 2764 if (nextrecvisfrom != rank) { 2765 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2766 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2767 } 2768 bn = nextbn; 2769 recvisfrom = nextrecvisfrom; 2770 sendbuf = recvbuf; 2771 } 2772 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2773 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2774 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2775 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2776 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2781 { 2782 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2783 Mat_MatTransMultDense *abt; 2784 PetscErrorCode ierr; 2785 MPI_Comm comm; 2786 PetscMPIInt size; 2787 PetscScalar *cv, *sendbuf, *recvbuf; 2788 const PetscScalar *av,*bv; 2789 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2790 PetscScalar _DOne=1.0,_DZero=0.0; 2791 PetscBLASInt cm, cn, ck, alda, clda; 2792 2793 PetscFunctionBegin; 2794 MatCheckProduct(C,3); 2795 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2796 abt = (Mat_MatTransMultDense*)C->product->data; 2797 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2798 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2799 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2800 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2801 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2802 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2803 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2804 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2805 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2806 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2807 /* copy transpose of B into buf[0] */ 2808 bn = B->rmap->n; 2809 sendbuf = abt->buf[0]; 2810 recvbuf = abt->buf[1]; 2811 for (k = 0, j = 0; j < bn; j++) { 2812 for (i = 0; i < cK; i++, k++) { 2813 sendbuf[k] = bv[i * blda + j]; 2814 } 2815 } 2816 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2817 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2818 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2819 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2820 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2821 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2822 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2823 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2824 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2825 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2826 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2831 { 2832 Mat_MatTransMultDense *abt; 2833 PetscErrorCode ierr; 2834 2835 PetscFunctionBegin; 2836 MatCheckProduct(C,3); 2837 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2838 abt = (Mat_MatTransMultDense*)C->product->data; 2839 switch (abt->alg) { 2840 case 1: 2841 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2842 break; 2843 default: 2844 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2845 break; 2846 } 2847 PetscFunctionReturn(0); 2848 } 2849 2850 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2851 { 2852 PetscErrorCode ierr; 2853 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2854 2855 PetscFunctionBegin; 2856 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2857 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2858 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2859 ierr = PetscFree(ab);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 #if defined(PETSC_HAVE_ELEMENTAL) 2864 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2865 { 2866 PetscErrorCode ierr; 2867 Mat_MatMultDense *ab; 2868 2869 PetscFunctionBegin; 2870 MatCheckProduct(C,3); 2871 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2872 ab = (Mat_MatMultDense*)C->product->data; 2873 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2874 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2875 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2876 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2877 PetscFunctionReturn(0); 2878 } 2879 2880 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2881 { 2882 PetscErrorCode ierr; 2883 Mat Ae,Be,Ce; 2884 Mat_MatMultDense *ab; 2885 2886 PetscFunctionBegin; 2887 MatCheckProduct(C,4); 2888 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2889 /* check local size of A and B */ 2890 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2891 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2892 } 2893 2894 /* create elemental matrices Ae and Be */ 2895 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2896 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2897 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2898 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2899 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2900 2901 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2902 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2903 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2904 ierr = MatSetUp(Be);CHKERRQ(ierr); 2905 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2906 2907 /* compute symbolic Ce = Ae*Be */ 2908 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2909 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2910 2911 /* setup C */ 2912 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2913 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2914 ierr = MatSetUp(C);CHKERRQ(ierr); 2915 2916 /* create data structure for reuse Cdense */ 2917 ierr = PetscNew(&ab);CHKERRQ(ierr); 2918 ab->Ae = Ae; 2919 ab->Be = Be; 2920 ab->Ce = Ce; 2921 2922 C->product->data = ab; 2923 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2924 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2925 PetscFunctionReturn(0); 2926 } 2927 #endif 2928 /* ----------------------------------------------- */ 2929 #if defined(PETSC_HAVE_ELEMENTAL) 2930 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2931 { 2932 PetscFunctionBegin; 2933 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2934 C->ops->productsymbolic = MatProductSymbolic_AB; 2935 PetscFunctionReturn(0); 2936 } 2937 #endif 2938 2939 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2940 { 2941 Mat_Product *product = C->product; 2942 Mat A = product->A,B=product->B; 2943 2944 PetscFunctionBegin; 2945 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2946 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2947 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2948 C->ops->productsymbolic = MatProductSymbolic_AtB; 2949 PetscFunctionReturn(0); 2950 } 2951 2952 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2953 { 2954 PetscErrorCode ierr; 2955 Mat_Product *product = C->product; 2956 const char *algTypes[2] = {"allgatherv","cyclic"}; 2957 PetscInt alg,nalg = 2; 2958 PetscBool flg = PETSC_FALSE; 2959 2960 PetscFunctionBegin; 2961 /* Set default algorithm */ 2962 alg = 0; /* default is allgatherv */ 2963 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2964 if (flg) { 2965 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2966 } 2967 2968 /* Get runtime option */ 2969 if (product->api_user) { 2970 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2971 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2972 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2973 } else { 2974 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2975 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2976 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2977 } 2978 if (flg) { 2979 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2980 } 2981 2982 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2983 C->ops->productsymbolic = MatProductSymbolic_ABt; 2984 PetscFunctionReturn(0); 2985 } 2986 2987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2988 { 2989 PetscErrorCode ierr; 2990 Mat_Product *product = C->product; 2991 2992 PetscFunctionBegin; 2993 switch (product->type) { 2994 #if defined(PETSC_HAVE_ELEMENTAL) 2995 case MATPRODUCT_AB: 2996 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2997 break; 2998 #endif 2999 case MATPRODUCT_AtB: 3000 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3001 break; 3002 case MATPRODUCT_ABt: 3003 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3004 break; 3005 default: 3006 break; 3007 } 3008 PetscFunctionReturn(0); 3009 } 3010