1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 59 PetscErrorCode ierr; 60 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 61 62 PetscFunctionBegin; 63 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 64 lrow = row - rstart; 65 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 PetscBool flg; 77 Mat B; 78 79 PetscFunctionBegin; 80 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 81 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 82 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 83 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 84 85 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 86 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 87 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 88 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 89 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 90 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 91 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 92 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 93 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 97 *a = B; 98 ierr = MatDestroy(&B);CHKERRQ(ierr); 99 } else *a = B; 100 PetscFunctionReturn(0); 101 } 102 103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 104 { 105 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 106 PetscErrorCode ierr; 107 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 108 PetscBool roworiented = A->roworiented; 109 110 PetscFunctionBegin; 111 for (i=0; i<m; i++) { 112 if (idxm[i] < 0) continue; 113 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 114 if (idxm[i] >= rstart && idxm[i] < rend) { 115 row = idxm[i] - rstart; 116 if (roworiented) { 117 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 118 } else { 119 for (j=0; j<n; j++) { 120 if (idxn[j] < 0) continue; 121 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 122 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 123 } 124 } 125 } else if (!A->donotstash) { 126 mat->assembled = PETSC_FALSE; 127 if (roworiented) { 128 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 129 } else { 130 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 160 { 161 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 162 PetscErrorCode ierr; 163 164 PetscFunctionBegin; 165 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 170 { 171 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 172 PetscBool iscuda; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 if (!a->A) { 177 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 178 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 179 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 180 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 181 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 182 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 183 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 184 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 185 } 186 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 191 { 192 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 197 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 230 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 231 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 236 { 237 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 238 PetscErrorCode ierr; 239 240 PetscFunctionBegin; 241 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 242 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 243 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 248 { 249 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 250 PetscErrorCode ierr; 251 252 PetscFunctionBegin; 253 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 254 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 255 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 260 { 261 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 262 PetscErrorCode ierr; 263 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 264 const PetscInt *irow,*icol; 265 const PetscScalar *v; 266 PetscScalar *bv; 267 Mat newmat; 268 IS iscol_local; 269 MPI_Comm comm_is,comm_mat; 270 271 PetscFunctionBegin; 272 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 273 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 274 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 275 276 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 278 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 279 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 280 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 281 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 282 283 /* No parallel redistribution currently supported! Should really check each index set 284 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 285 original matrix! */ 286 287 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 288 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 289 290 /* Check submatrix call */ 291 if (scall == MAT_REUSE_MATRIX) { 292 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 293 /* Really need to test rows and column sizes! */ 294 newmat = *B; 295 } else { 296 /* Create and fill new matrix */ 297 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 298 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 299 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 300 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 301 } 302 303 /* Now extract the data pointers and do the copy, column at a time */ 304 newmatd = (Mat_MPIDense*)newmat->data; 305 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 306 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 307 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 308 for (i=0; i<Ncols; i++) { 309 const PetscScalar *av = v + lda*icol[i]; 310 for (j=0; j<nrows; j++) { 311 *bv++ = av[irow[j] - rstart]; 312 } 313 } 314 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 315 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 316 317 /* Assemble the matrices so that the correct flags are set */ 318 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 321 /* Free work space */ 322 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 323 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 324 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 325 *B = newmat; 326 PetscFunctionReturn(0); 327 } 328 329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 330 { 331 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 334 PetscFunctionBegin; 335 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 340 { 341 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 360 { 361 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt nstash,reallocs; 364 365 PetscFunctionBegin; 366 if (mdn->donotstash || mat->nooffprocentries) return(0); 367 368 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 369 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 370 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 375 { 376 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 377 PetscErrorCode ierr; 378 PetscInt i,*row,*col,flg,j,rstart,ncols; 379 PetscMPIInt n; 380 PetscScalar *val; 381 382 PetscFunctionBegin; 383 if (!mdn->donotstash && !mat->nooffprocentries) { 384 /* wait on receives */ 385 while (1) { 386 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 387 if (!flg) break; 388 389 for (i=0; i<n;) { 390 /* Now identify the consecutive vals belonging to the same row */ 391 for (j=i,rstart=row[j]; j<n; j++) { 392 if (row[j] != rstart) break; 393 } 394 if (j < n) ncols = j-i; 395 else ncols = n-i; 396 /* Now assemble all these values with a single function call */ 397 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 398 i = j; 399 } 400 } 401 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 402 } 403 404 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 405 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 406 407 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 408 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 409 } 410 PetscFunctionReturn(0); 411 } 412 413 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 414 { 415 PetscErrorCode ierr; 416 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 424 { 425 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 426 PetscErrorCode ierr; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 438 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 441 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 442 } 443 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 444 if (diag != 0.0) { 445 Vec d; 446 447 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 448 ierr = VecSet(d,diag);CHKERRQ(ierr); 449 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 450 ierr = VecDestroy(&d);CHKERRQ(ierr); 451 } 452 ierr = PetscFree(lrows);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 PetscErrorCode ierr; 465 const PetscScalar *ax; 466 PetscScalar *ay; 467 468 PetscFunctionBegin; 469 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 470 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 471 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 472 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 473 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 474 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 475 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 480 { 481 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 482 PetscErrorCode ierr; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 486 PetscFunctionBegin; 487 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 488 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 489 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 490 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 491 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 492 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 493 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 498 { 499 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 500 PetscErrorCode ierr; 501 const PetscScalar *ax; 502 PetscScalar *ay; 503 504 PetscFunctionBegin; 505 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 506 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 507 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 508 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 509 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 510 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 511 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 512 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 517 { 518 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 519 PetscErrorCode ierr; 520 const PetscScalar *ax; 521 PetscScalar *ay; 522 523 PetscFunctionBegin; 524 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 525 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 526 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 527 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 528 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 529 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 530 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 531 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 536 { 537 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 538 PetscErrorCode ierr; 539 PetscInt lda,len,i,n,m = A->rmap->n,radd; 540 PetscScalar *x,zero = 0.0; 541 const PetscScalar *av; 542 543 PetscFunctionBegin; 544 ierr = VecSet(v,zero);CHKERRQ(ierr); 545 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 546 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 547 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 548 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 549 radd = A->rmap->rstart*m; 550 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 551 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 552 for (i=0; i<len; i++) { 553 x[i] = av[radd + i*lda + i]; 554 } 555 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 556 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 PetscErrorCode MatDestroy_MPIDense(Mat mat) 561 { 562 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 563 PetscErrorCode ierr; 564 565 PetscFunctionBegin; 566 #if defined(PETSC_USE_LOG) 567 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 568 #endif 569 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 570 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 571 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 572 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 573 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 574 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 575 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 576 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 577 578 ierr = PetscFree(mat->data);CHKERRQ(ierr); 579 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 580 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 592 #if defined(PETSC_HAVE_ELEMENTAL) 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 594 #endif 595 #if defined(PETSC_HAVE_SCALAPACK) 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 597 #endif 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 601 #if defined (PETSC_HAVE_CUDA) 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 604 #endif 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 609 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 611 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 612 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 614 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 619 620 #include <petscdraw.h> 621 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 622 { 623 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 624 PetscErrorCode ierr; 625 PetscMPIInt rank; 626 PetscViewerType vtype; 627 PetscBool iascii,isdraw; 628 PetscViewer sviewer; 629 PetscViewerFormat format; 630 631 PetscFunctionBegin; 632 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 633 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 634 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 635 if (iascii) { 636 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 637 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 638 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 639 MatInfo info; 640 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 641 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 642 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 643 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 644 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 645 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } else if (format == PETSC_VIEWER_ASCII_INFO) { 648 PetscFunctionReturn(0); 649 } 650 } else if (isdraw) { 651 PetscDraw draw; 652 PetscBool isnull; 653 654 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 655 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 656 if (isnull) PetscFunctionReturn(0); 657 } 658 659 { 660 /* assemble the entire matrix onto first processor. */ 661 Mat A; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 663 PetscInt *cols; 664 PetscScalar *vals; 665 666 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 667 if (!rank) { 668 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 669 } else { 670 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 671 } 672 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 673 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 674 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 675 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 676 677 /* Copy the matrix ... This isn't the most efficient means, 678 but it's quick for now */ 679 A->insertmode = INSERT_VALUES; 680 681 row = mat->rmap->rstart; 682 m = mdn->A->rmap->n; 683 for (i=0; i<m; i++) { 684 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 685 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 686 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 687 row++; 688 } 689 690 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 692 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 693 if (!rank) { 694 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 695 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 698 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 699 ierr = MatDestroy(&A);CHKERRQ(ierr); 700 } 701 PetscFunctionReturn(0); 702 } 703 704 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 705 { 706 PetscErrorCode ierr; 707 PetscBool iascii,isbinary,isdraw,issocket; 708 709 PetscFunctionBegin; 710 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 711 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 713 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 714 715 if (iascii || issocket || isdraw) { 716 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 717 } else if (isbinary) { 718 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 724 { 725 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 726 Mat mdn = mat->A; 727 PetscErrorCode ierr; 728 PetscLogDouble isend[5],irecv[5]; 729 730 PetscFunctionBegin; 731 info->block_size = 1.0; 732 733 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 734 735 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 736 isend[3] = info->memory; isend[4] = info->mallocs; 737 if (flag == MAT_LOCAL) { 738 info->nz_used = isend[0]; 739 info->nz_allocated = isend[1]; 740 info->nz_unneeded = isend[2]; 741 info->memory = isend[3]; 742 info->mallocs = isend[4]; 743 } else if (flag == MAT_GLOBAL_MAX) { 744 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 745 746 info->nz_used = irecv[0]; 747 info->nz_allocated = irecv[1]; 748 info->nz_unneeded = irecv[2]; 749 info->memory = irecv[3]; 750 info->mallocs = irecv[4]; 751 } else if (flag == MAT_GLOBAL_SUM) { 752 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 753 754 info->nz_used = irecv[0]; 755 info->nz_allocated = irecv[1]; 756 info->nz_unneeded = irecv[2]; 757 info->memory = irecv[3]; 758 info->mallocs = irecv[4]; 759 } 760 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 761 info->fill_ratio_needed = 0; 762 info->factor_mallocs = 0; 763 PetscFunctionReturn(0); 764 } 765 766 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 767 { 768 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 switch (op) { 773 case MAT_NEW_NONZERO_LOCATIONS: 774 case MAT_NEW_NONZERO_LOCATION_ERR: 775 case MAT_NEW_NONZERO_ALLOCATION_ERR: 776 MatCheckPreallocated(A,1); 777 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 778 break; 779 case MAT_ROW_ORIENTED: 780 MatCheckPreallocated(A,1); 781 a->roworiented = flg; 782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 783 break; 784 case MAT_NEW_DIAGONALS: 785 case MAT_KEEP_NONZERO_PATTERN: 786 case MAT_USE_HASH_TABLE: 787 case MAT_SORTED_FULL: 788 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 789 break; 790 case MAT_IGNORE_OFF_PROC_ENTRIES: 791 a->donotstash = flg; 792 break; 793 case MAT_SYMMETRIC: 794 case MAT_STRUCTURALLY_SYMMETRIC: 795 case MAT_HERMITIAN: 796 case MAT_SYMMETRY_ETERNAL: 797 case MAT_IGNORE_LOWER_TRIANGULAR: 798 case MAT_IGNORE_ZERO_ENTRIES: 799 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 800 break; 801 default: 802 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 808 { 809 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 810 const PetscScalar *l; 811 PetscScalar x,*v,*vv,*r; 812 PetscErrorCode ierr; 813 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 814 815 PetscFunctionBegin; 816 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 817 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 818 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 819 if (ll) { 820 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 821 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 822 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 823 for (i=0; i<m; i++) { 824 x = l[i]; 825 v = vv + i; 826 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 827 } 828 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 829 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 830 } 831 if (rr) { 832 const PetscScalar *ar; 833 834 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 835 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 836 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 837 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 838 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 839 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 840 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 841 for (i=0; i<n; i++) { 842 x = r[i]; 843 v = vv + i*lda; 844 for (j=0; j<m; j++) (*v++) *= x; 845 } 846 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 847 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 848 } 849 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 854 { 855 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 856 PetscErrorCode ierr; 857 PetscInt i,j; 858 PetscMPIInt size; 859 PetscReal sum = 0.0; 860 const PetscScalar *av,*v; 861 862 PetscFunctionBegin; 863 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 864 v = av; 865 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 866 if (size == 1) { 867 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 868 } else { 869 if (type == NORM_FROBENIUS) { 870 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 871 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 872 } 873 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 874 *nrm = PetscSqrtReal(*nrm); 875 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 876 } else if (type == NORM_1) { 877 PetscReal *tmp,*tmp2; 878 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 879 *nrm = 0.0; 880 v = av; 881 for (j=0; j<mdn->A->cmap->n; j++) { 882 for (i=0; i<mdn->A->rmap->n; i++) { 883 tmp[j] += PetscAbsScalar(*v); v++; 884 } 885 } 886 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 887 for (j=0; j<A->cmap->N; j++) { 888 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 889 } 890 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 891 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 892 } else if (type == NORM_INFINITY) { /* max row norm */ 893 PetscReal ntemp; 894 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 895 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 896 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 897 } 898 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 903 { 904 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 905 Mat B; 906 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 907 PetscErrorCode ierr; 908 PetscInt j,i,lda; 909 PetscScalar *v; 910 911 PetscFunctionBegin; 912 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 913 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 914 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 915 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 916 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 917 } else B = *matout; 918 919 m = a->A->rmap->n; n = a->A->cmap->n; 920 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 921 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 922 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 923 for (i=0; i<m; i++) rwork[i] = rstart + i; 924 for (j=0; j<n; j++) { 925 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 926 v += lda; 927 } 928 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 929 ierr = PetscFree(rwork);CHKERRQ(ierr); 930 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 932 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 933 *matout = B; 934 } else { 935 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } 939 940 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 941 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 942 943 PetscErrorCode MatSetUp_MPIDense(Mat A) 944 { 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 949 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 950 if (!A->preallocated) { 951 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 952 } 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 957 { 958 PetscErrorCode ierr; 959 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 960 961 PetscFunctionBegin; 962 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatConjugate_MPIDense(Mat mat) 967 { 968 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = MatConjugate(a->A);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatRealPart_MPIDense(Mat A) 977 { 978 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 ierr = MatRealPart(a->A);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 987 { 988 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 997 { 998 PetscErrorCode ierr; 999 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1000 1001 PetscFunctionBegin; 1002 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1003 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1004 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1009 1010 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1011 { 1012 PetscErrorCode ierr; 1013 PetscInt i,n; 1014 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1015 PetscReal *work; 1016 1017 PetscFunctionBegin; 1018 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1019 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1020 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1021 if (type == NORM_2) { 1022 for (i=0; i<n; i++) work[i] *= work[i]; 1023 } 1024 if (type == NORM_INFINITY) { 1025 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1026 } else { 1027 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1028 } 1029 ierr = PetscFree(work);CHKERRQ(ierr); 1030 if (type == NORM_2) { 1031 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #if defined(PETSC_HAVE_CUDA) 1037 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1038 { 1039 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1040 PetscErrorCode ierr; 1041 PetscInt lda; 1042 1043 PetscFunctionBegin; 1044 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1045 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1046 if (!a->cvec) { 1047 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1048 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1049 } 1050 a->vecinuse = col + 1; 1051 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1052 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1053 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1054 *v = a->cvec; 1055 PetscFunctionReturn(0); 1056 } 1057 1058 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1059 { 1060 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1061 PetscErrorCode ierr; 1062 1063 PetscFunctionBegin; 1064 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1065 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1066 a->vecinuse = 0; 1067 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1068 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1069 *v = NULL; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1074 { 1075 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1076 PetscInt lda; 1077 PetscErrorCode ierr; 1078 1079 PetscFunctionBegin; 1080 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1081 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1082 if (!a->cvec) { 1083 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1084 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1085 } 1086 a->vecinuse = col + 1; 1087 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1088 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1089 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1090 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1091 *v = a->cvec; 1092 PetscFunctionReturn(0); 1093 } 1094 1095 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1096 { 1097 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1102 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1103 a->vecinuse = 0; 1104 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1105 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1106 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1107 *v = NULL; 1108 PetscFunctionReturn(0); 1109 } 1110 1111 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1112 { 1113 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1114 PetscErrorCode ierr; 1115 PetscInt lda; 1116 1117 PetscFunctionBegin; 1118 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1119 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1120 if (!a->cvec) { 1121 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1122 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1123 } 1124 a->vecinuse = col + 1; 1125 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1126 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1127 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1128 *v = a->cvec; 1129 PetscFunctionReturn(0); 1130 } 1131 1132 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1133 { 1134 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1139 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1140 a->vecinuse = 0; 1141 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1142 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1143 *v = NULL; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1148 { 1149 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1154 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1155 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1160 { 1161 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1162 PetscErrorCode ierr; 1163 1164 PetscFunctionBegin; 1165 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1166 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1167 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1172 { 1173 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1178 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1179 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1184 { 1185 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1186 PetscErrorCode ierr; 1187 1188 PetscFunctionBegin; 1189 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1194 { 1195 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1196 PetscErrorCode ierr; 1197 1198 PetscFunctionBegin; 1199 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1204 { 1205 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1214 { 1215 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1224 { 1225 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1234 { 1235 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1244 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1245 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1246 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1247 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1248 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1249 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1250 1251 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1252 { 1253 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1258 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1259 if (d->A) { 1260 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1261 } 1262 mat->boundtocpu = bind; 1263 if (!bind) { 1264 PetscBool iscuda; 1265 1266 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1267 if (!iscuda) { 1268 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1269 } 1270 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1271 if (!iscuda) { 1272 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1273 } 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1280 } else { 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1287 } 1288 if (d->cmat) { 1289 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1290 } 1291 PetscFunctionReturn(0); 1292 } 1293 1294 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1295 { 1296 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1297 PetscErrorCode ierr; 1298 PetscBool iscuda; 1299 1300 PetscFunctionBegin; 1301 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1302 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1303 if (!iscuda) PetscFunctionReturn(0); 1304 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1305 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1306 if (!d->A) { 1307 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1308 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1309 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1310 } 1311 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1312 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1313 A->preallocated = PETSC_TRUE; 1314 PetscFunctionReturn(0); 1315 } 1316 #endif 1317 1318 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1319 { 1320 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1329 { 1330 PetscFunctionBegin; 1331 *missing = PETSC_FALSE; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1336 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1337 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1338 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1339 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1340 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1341 1342 /* -------------------------------------------------------------------*/ 1343 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1344 MatGetRow_MPIDense, 1345 MatRestoreRow_MPIDense, 1346 MatMult_MPIDense, 1347 /* 4*/ MatMultAdd_MPIDense, 1348 MatMultTranspose_MPIDense, 1349 MatMultTransposeAdd_MPIDense, 1350 0, 1351 0, 1352 0, 1353 /* 10*/ 0, 1354 0, 1355 0, 1356 0, 1357 MatTranspose_MPIDense, 1358 /* 15*/ MatGetInfo_MPIDense, 1359 MatEqual_MPIDense, 1360 MatGetDiagonal_MPIDense, 1361 MatDiagonalScale_MPIDense, 1362 MatNorm_MPIDense, 1363 /* 20*/ MatAssemblyBegin_MPIDense, 1364 MatAssemblyEnd_MPIDense, 1365 MatSetOption_MPIDense, 1366 MatZeroEntries_MPIDense, 1367 /* 24*/ MatZeroRows_MPIDense, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /* 29*/ MatSetUp_MPIDense, 1373 0, 1374 0, 1375 MatGetDiagonalBlock_MPIDense, 1376 0, 1377 /* 34*/ MatDuplicate_MPIDense, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /* 39*/ MatAXPY_MPIDense, 1383 MatCreateSubMatrices_MPIDense, 1384 0, 1385 MatGetValues_MPIDense, 1386 0, 1387 /* 44*/ 0, 1388 MatScale_MPIDense, 1389 MatShift_Basic, 1390 0, 1391 0, 1392 /* 49*/ MatSetRandom_MPIDense, 1393 0, 1394 0, 1395 0, 1396 0, 1397 /* 54*/ 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /* 59*/ MatCreateSubMatrix_MPIDense, 1403 MatDestroy_MPIDense, 1404 MatView_MPIDense, 1405 0, 1406 0, 1407 /* 64*/ 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /* 69*/ 0, 1413 0, 1414 0, 1415 0, 1416 0, 1417 /* 74*/ 0, 1418 0, 1419 0, 1420 0, 1421 0, 1422 /* 79*/ 0, 1423 0, 1424 0, 1425 0, 1426 /* 83*/ MatLoad_MPIDense, 1427 0, 1428 0, 1429 0, 1430 0, 1431 0, 1432 /* 89*/ 0, 1433 0, 1434 0, 1435 0, 1436 0, 1437 /* 94*/ 0, 1438 0, 1439 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1440 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1441 0, 1442 /* 99*/ MatProductSetFromOptions_MPIDense, 1443 0, 1444 0, 1445 MatConjugate_MPIDense, 1446 0, 1447 /*104*/ 0, 1448 MatRealPart_MPIDense, 1449 MatImaginaryPart_MPIDense, 1450 0, 1451 0, 1452 /*109*/ 0, 1453 0, 1454 0, 1455 MatGetColumnVector_MPIDense, 1456 MatMissingDiagonal_MPIDense, 1457 /*114*/ 0, 1458 0, 1459 0, 1460 0, 1461 0, 1462 /*119*/ 0, 1463 0, 1464 0, 1465 0, 1466 0, 1467 /*124*/ 0, 1468 MatGetColumnNorms_MPIDense, 1469 0, 1470 0, 1471 0, 1472 /*129*/ 0, 1473 0, 1474 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1475 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1476 0, 1477 /*134*/ 0, 1478 0, 1479 0, 1480 0, 1481 0, 1482 /*139*/ 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1488 /*145*/ 0, 1489 0, 1490 0 1491 }; 1492 1493 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1494 { 1495 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1496 PetscBool iscuda; 1497 PetscErrorCode ierr; 1498 1499 PetscFunctionBegin; 1500 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1501 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1502 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1503 if (!a->A) { 1504 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1505 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1506 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1507 } 1508 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1509 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1510 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1511 mat->preallocated = PETSC_TRUE; 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #if defined(PETSC_HAVE_ELEMENTAL) 1516 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1517 { 1518 Mat mat_elemental; 1519 PetscErrorCode ierr; 1520 PetscScalar *v; 1521 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1522 1523 PetscFunctionBegin; 1524 if (reuse == MAT_REUSE_MATRIX) { 1525 mat_elemental = *newmat; 1526 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1527 } else { 1528 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1529 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1530 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1531 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1532 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1533 } 1534 1535 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1536 for (i=0; i<N; i++) cols[i] = i; 1537 for (i=0; i<m; i++) rows[i] = rstart + i; 1538 1539 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1540 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1541 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1542 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1545 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1546 1547 if (reuse == MAT_INPLACE_MATRIX) { 1548 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1549 } else { 1550 *newmat = mat_elemental; 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 #endif 1555 1556 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1557 { 1558 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1567 { 1568 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1569 PetscErrorCode ierr; 1570 1571 PetscFunctionBegin; 1572 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1577 { 1578 PetscErrorCode ierr; 1579 Mat_MPIDense *mat; 1580 PetscInt m,nloc,N; 1581 1582 PetscFunctionBegin; 1583 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1584 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1585 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1586 PetscInt sum; 1587 1588 if (n == PETSC_DECIDE) { 1589 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1590 } 1591 /* Check sum(n) = N */ 1592 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1593 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1594 1595 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1596 } 1597 1598 /* numeric phase */ 1599 mat = (Mat_MPIDense*)(*outmat)->data; 1600 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1601 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1602 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #if defined(PETSC_HAVE_CUDA) 1607 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1608 { 1609 Mat B; 1610 Mat_MPIDense *m; 1611 PetscErrorCode ierr; 1612 1613 PetscFunctionBegin; 1614 if (reuse == MAT_INITIAL_MATRIX) { 1615 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1616 } else if (reuse == MAT_REUSE_MATRIX) { 1617 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1618 } 1619 1620 B = *newmat; 1621 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1622 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1623 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1624 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1639 m = (Mat_MPIDense*)(B)->data; 1640 if (m->A) { 1641 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1642 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1643 } 1644 B->ops->bindtocpu = NULL; 1645 B->offloadmask = PETSC_OFFLOAD_CPU; 1646 PetscFunctionReturn(0); 1647 } 1648 1649 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1650 { 1651 Mat B; 1652 Mat_MPIDense *m; 1653 PetscErrorCode ierr; 1654 1655 PetscFunctionBegin; 1656 if (reuse == MAT_INITIAL_MATRIX) { 1657 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1658 } else if (reuse == MAT_REUSE_MATRIX) { 1659 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1660 } 1661 1662 B = *newmat; 1663 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1664 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1665 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1680 m = (Mat_MPIDense*)(B)->data; 1681 if (m->A) { 1682 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1683 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1684 B->offloadmask = PETSC_OFFLOAD_BOTH; 1685 } else { 1686 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1687 } 1688 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1689 1690 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1691 PetscFunctionReturn(0); 1692 } 1693 #endif 1694 1695 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1696 { 1697 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1698 PetscErrorCode ierr; 1699 PetscInt lda; 1700 1701 PetscFunctionBegin; 1702 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1703 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1704 if (!a->cvec) { 1705 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1706 } 1707 a->vecinuse = col + 1; 1708 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1709 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1710 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1711 *v = a->cvec; 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1716 { 1717 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1722 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1723 a->vecinuse = 0; 1724 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1725 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1726 *v = NULL; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1731 { 1732 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1733 PetscErrorCode ierr; 1734 PetscInt lda; 1735 1736 PetscFunctionBegin; 1737 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1738 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1739 if (!a->cvec) { 1740 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1741 } 1742 a->vecinuse = col + 1; 1743 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1744 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1745 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1746 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1747 *v = a->cvec; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1752 { 1753 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1754 PetscErrorCode ierr; 1755 1756 PetscFunctionBegin; 1757 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1758 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1759 a->vecinuse = 0; 1760 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1761 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1762 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1763 *v = NULL; 1764 PetscFunctionReturn(0); 1765 } 1766 1767 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1768 { 1769 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1770 PetscErrorCode ierr; 1771 PetscInt lda; 1772 1773 PetscFunctionBegin; 1774 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1775 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1776 if (!a->cvec) { 1777 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1778 } 1779 a->vecinuse = col + 1; 1780 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1781 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1782 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1783 *v = a->cvec; 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1788 { 1789 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1790 PetscErrorCode ierr; 1791 1792 PetscFunctionBegin; 1793 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1794 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1795 a->vecinuse = 0; 1796 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1797 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1798 *v = NULL; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1803 { 1804 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1805 Mat_MPIDense *c; 1806 PetscErrorCode ierr; 1807 MPI_Comm comm; 1808 PetscBool setup = PETSC_FALSE; 1809 1810 PetscFunctionBegin; 1811 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1812 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1813 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1814 if (!a->cmat) { 1815 setup = PETSC_TRUE; 1816 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1817 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1818 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1819 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1820 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1821 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1822 } else if (cend-cbegin != a->cmat->cmap->N) { 1823 setup = PETSC_TRUE; 1824 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1825 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1826 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1827 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1828 } 1829 c = (Mat_MPIDense*)a->cmat->data; 1830 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1831 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1832 if (setup) { /* do we really need this? */ 1833 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1834 } 1835 a->cmat->preallocated = PETSC_TRUE; 1836 a->cmat->assembled = PETSC_TRUE; 1837 a->matinuse = cbegin + 1; 1838 *v = a->cmat; 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1843 { 1844 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1845 Mat_MPIDense *c; 1846 PetscErrorCode ierr; 1847 1848 PetscFunctionBegin; 1849 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1850 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1851 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1852 a->matinuse = 0; 1853 c = (Mat_MPIDense*)a->cmat->data; 1854 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1855 *v = NULL; 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1860 { 1861 Mat_MPIDense *a; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1866 mat->data = (void*)a; 1867 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1868 1869 mat->insertmode = NOT_SET_VALUES; 1870 1871 /* build cache for off array entries formed */ 1872 a->donotstash = PETSC_FALSE; 1873 1874 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1875 1876 /* stuff used for matrix vector multiply */ 1877 a->lvec = 0; 1878 a->Mvctx = 0; 1879 a->roworiented = PETSC_TRUE; 1880 1881 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1882 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1883 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1884 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1885 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1886 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1887 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1888 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1889 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1890 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1891 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1892 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1893 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1894 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1895 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1896 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1898 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1899 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1900 #if defined(PETSC_HAVE_ELEMENTAL) 1901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1902 #endif 1903 #if defined(PETSC_HAVE_SCALAPACK) 1904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1905 #endif 1906 #if defined(PETSC_HAVE_CUDA) 1907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1908 #endif 1909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1912 #if defined(PETSC_HAVE_CUDA) 1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1915 #endif 1916 1917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1919 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 /*MC 1924 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1925 1926 Options Database Keys: 1927 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1928 1929 Level: beginner 1930 1931 .seealso: 1932 1933 M*/ 1934 #if defined(PETSC_HAVE_CUDA) 1935 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1936 { 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1941 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 #endif 1945 1946 /*MC 1947 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1948 1949 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1950 and MATMPIDENSE otherwise. 1951 1952 Options Database Keys: 1953 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1954 1955 Level: beginner 1956 1957 1958 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1959 M*/ 1960 1961 /*MC 1962 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1963 1964 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1965 and MATMPIDENSECUDA otherwise. 1966 1967 Options Database Keys: 1968 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1969 1970 Level: beginner 1971 1972 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1973 M*/ 1974 1975 /*@C 1976 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1977 1978 Collective 1979 1980 Input Parameters: 1981 . B - the matrix 1982 - data - optional location of matrix data. Set data=NULL for PETSc 1983 to control all matrix memory allocation. 1984 1985 Notes: 1986 The dense format is fully compatible with standard Fortran 77 1987 storage by columns. 1988 1989 The data input variable is intended primarily for Fortran programmers 1990 who wish to allocate their own matrix memory space. Most users should 1991 set data=NULL. 1992 1993 Level: intermediate 1994 1995 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1996 @*/ 1997 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1998 { 1999 PetscErrorCode ierr; 2000 2001 PetscFunctionBegin; 2002 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2003 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 /*@ 2008 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2009 array provided by the user. This is useful to avoid copying an array 2010 into a matrix 2011 2012 Not Collective 2013 2014 Input Parameters: 2015 + mat - the matrix 2016 - array - the array in column major order 2017 2018 Notes: 2019 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2020 freed when the matrix is destroyed. 2021 2022 Level: developer 2023 2024 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2025 2026 @*/ 2027 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2028 { 2029 PetscErrorCode ierr; 2030 2031 PetscFunctionBegin; 2032 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2033 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2034 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2035 #if defined(PETSC_HAVE_CUDA) 2036 mat->offloadmask = PETSC_OFFLOAD_CPU; 2037 #endif 2038 PetscFunctionReturn(0); 2039 } 2040 2041 /*@ 2042 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2043 2044 Not Collective 2045 2046 Input Parameters: 2047 . mat - the matrix 2048 2049 Notes: 2050 You can only call this after a call to MatDensePlaceArray() 2051 2052 Level: developer 2053 2054 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2055 2056 @*/ 2057 PetscErrorCode MatDenseResetArray(Mat mat) 2058 { 2059 PetscErrorCode ierr; 2060 2061 PetscFunctionBegin; 2062 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2063 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2064 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /*@ 2069 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2070 array provided by the user. This is useful to avoid copying an array 2071 into a matrix 2072 2073 Not Collective 2074 2075 Input Parameters: 2076 + mat - the matrix 2077 - array - the array in column major order 2078 2079 Notes: 2080 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2081 freed by the user. It will be freed when the matrix is destroyed. 2082 2083 Level: developer 2084 2085 .seealso: MatDenseGetArray(), VecReplaceArray() 2086 @*/ 2087 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2088 { 2089 PetscErrorCode ierr; 2090 2091 PetscFunctionBegin; 2092 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2093 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2094 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2095 #if defined(PETSC_HAVE_CUDA) 2096 mat->offloadmask = PETSC_OFFLOAD_CPU; 2097 #endif 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #if defined(PETSC_HAVE_CUDA) 2102 /*@C 2103 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2104 array provided by the user. This is useful to avoid copying an array 2105 into a matrix 2106 2107 Not Collective 2108 2109 Input Parameters: 2110 + mat - the matrix 2111 - array - the array in column major order 2112 2113 Notes: 2114 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2115 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2116 2117 Level: developer 2118 2119 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2120 @*/ 2121 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2122 { 2123 PetscErrorCode ierr; 2124 2125 PetscFunctionBegin; 2126 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2127 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2128 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2129 mat->offloadmask = PETSC_OFFLOAD_GPU; 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /*@C 2134 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2135 2136 Not Collective 2137 2138 Input Parameters: 2139 . mat - the matrix 2140 2141 Notes: 2142 You can only call this after a call to MatDenseCUDAPlaceArray() 2143 2144 Level: developer 2145 2146 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2147 2148 @*/ 2149 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2150 { 2151 PetscErrorCode ierr; 2152 2153 PetscFunctionBegin; 2154 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2155 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2156 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /*@C 2161 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2162 array provided by the user. This is useful to avoid copying an array 2163 into a matrix 2164 2165 Not Collective 2166 2167 Input Parameters: 2168 + mat - the matrix 2169 - array - the array in column major order 2170 2171 Notes: 2172 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2173 The memory passed in CANNOT be freed by the user. It will be freed 2174 when the matrix is destroyed. The array should respect the matrix leading dimension. 2175 2176 Level: developer 2177 2178 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2179 @*/ 2180 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2181 { 2182 PetscErrorCode ierr; 2183 2184 PetscFunctionBegin; 2185 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2186 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2187 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2188 mat->offloadmask = PETSC_OFFLOAD_GPU; 2189 PetscFunctionReturn(0); 2190 } 2191 2192 /*@C 2193 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2194 2195 Not Collective 2196 2197 Input Parameters: 2198 . A - the matrix 2199 2200 Output Parameters 2201 . array - the GPU array in column major order 2202 2203 Notes: 2204 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2205 2206 Level: developer 2207 2208 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2209 @*/ 2210 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2211 { 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2216 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2217 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 /*@C 2222 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2223 2224 Not Collective 2225 2226 Input Parameters: 2227 + A - the matrix 2228 - array - the GPU array in column major order 2229 2230 Notes: 2231 2232 Level: developer 2233 2234 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2235 @*/ 2236 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2237 { 2238 PetscErrorCode ierr; 2239 2240 PetscFunctionBegin; 2241 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2242 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2243 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2244 A->offloadmask = PETSC_OFFLOAD_GPU; 2245 PetscFunctionReturn(0); 2246 } 2247 2248 /*@C 2249 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2250 2251 Not Collective 2252 2253 Input Parameters: 2254 . A - the matrix 2255 2256 Output Parameters 2257 . array - the GPU array in column major order 2258 2259 Notes: 2260 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2261 2262 Level: developer 2263 2264 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2265 @*/ 2266 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2267 { 2268 PetscErrorCode ierr; 2269 2270 PetscFunctionBegin; 2271 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2272 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 /*@C 2277 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2278 2279 Not Collective 2280 2281 Input Parameters: 2282 + A - the matrix 2283 - array - the GPU array in column major order 2284 2285 Notes: 2286 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2287 2288 Level: developer 2289 2290 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2291 @*/ 2292 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2293 { 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 /*@C 2302 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2303 2304 Not Collective 2305 2306 Input Parameters: 2307 . A - the matrix 2308 2309 Output Parameters 2310 . array - the GPU array in column major order 2311 2312 Notes: 2313 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2314 2315 Level: developer 2316 2317 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2318 @*/ 2319 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2320 { 2321 PetscErrorCode ierr; 2322 2323 PetscFunctionBegin; 2324 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2325 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2326 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2327 PetscFunctionReturn(0); 2328 } 2329 2330 /*@C 2331 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2332 2333 Not Collective 2334 2335 Input Parameters: 2336 + A - the matrix 2337 - array - the GPU array in column major order 2338 2339 Notes: 2340 2341 Level: developer 2342 2343 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2344 @*/ 2345 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2346 { 2347 PetscErrorCode ierr; 2348 2349 PetscFunctionBegin; 2350 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2351 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2352 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2353 A->offloadmask = PETSC_OFFLOAD_GPU; 2354 PetscFunctionReturn(0); 2355 } 2356 #endif 2357 2358 /*@C 2359 MatCreateDense - Creates a matrix in dense format. 2360 2361 Collective 2362 2363 Input Parameters: 2364 + comm - MPI communicator 2365 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2366 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2367 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2368 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2369 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2370 to control all matrix memory allocation. 2371 2372 Output Parameter: 2373 . A - the matrix 2374 2375 Notes: 2376 The dense format is fully compatible with standard Fortran 77 2377 storage by columns. 2378 2379 The data input variable is intended primarily for Fortran programmers 2380 who wish to allocate their own matrix memory space. Most users should 2381 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2382 2383 The user MUST specify either the local or global matrix dimensions 2384 (possibly both). 2385 2386 Level: intermediate 2387 2388 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2389 @*/ 2390 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2391 { 2392 PetscErrorCode ierr; 2393 PetscMPIInt size; 2394 2395 PetscFunctionBegin; 2396 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2397 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2399 if (size > 1) { 2400 PetscBool havedata = (PetscBool)!!data; 2401 2402 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2403 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2404 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 2405 if (havedata) { /* user provided data array, so no need to assemble */ 2406 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2407 (*A)->assembled = PETSC_TRUE; 2408 } 2409 } else { 2410 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2411 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2412 } 2413 PetscFunctionReturn(0); 2414 } 2415 2416 #if defined(PETSC_HAVE_CUDA) 2417 /*@C 2418 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2419 2420 Collective 2421 2422 Input Parameters: 2423 + comm - MPI communicator 2424 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2425 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2426 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2427 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2428 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2429 to control matrix memory allocation. 2430 2431 Output Parameter: 2432 . A - the matrix 2433 2434 Notes: 2435 2436 Level: intermediate 2437 2438 .seealso: MatCreate(), MatCreateDense() 2439 @*/ 2440 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2441 { 2442 PetscErrorCode ierr; 2443 PetscMPIInt size; 2444 2445 PetscFunctionBegin; 2446 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2447 PetscValidLogicalCollectiveBool(*A,!!data,6); 2448 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2449 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2450 if (size > 1) { 2451 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2452 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2453 if (data) { /* user provided data array, so no need to assemble */ 2454 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2455 (*A)->assembled = PETSC_TRUE; 2456 } 2457 } else { 2458 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2459 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2460 } 2461 PetscFunctionReturn(0); 2462 } 2463 #endif 2464 2465 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2466 { 2467 Mat mat; 2468 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2469 PetscErrorCode ierr; 2470 2471 PetscFunctionBegin; 2472 *newmat = 0; 2473 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2474 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2475 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2476 a = (Mat_MPIDense*)mat->data; 2477 2478 mat->factortype = A->factortype; 2479 mat->assembled = PETSC_TRUE; 2480 mat->preallocated = PETSC_TRUE; 2481 2482 mat->insertmode = NOT_SET_VALUES; 2483 a->donotstash = oldmat->donotstash; 2484 2485 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2486 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2487 2488 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2489 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2490 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2491 2492 *newmat = mat; 2493 PetscFunctionReturn(0); 2494 } 2495 2496 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2497 { 2498 PetscErrorCode ierr; 2499 PetscBool isbinary; 2500 #if defined(PETSC_HAVE_HDF5) 2501 PetscBool ishdf5; 2502 #endif 2503 2504 PetscFunctionBegin; 2505 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2506 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2507 /* force binary viewer to load .info file if it has not yet done so */ 2508 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2509 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2510 #if defined(PETSC_HAVE_HDF5) 2511 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2512 #endif 2513 if (isbinary) { 2514 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2515 #if defined(PETSC_HAVE_HDF5) 2516 } else if (ishdf5) { 2517 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2518 #endif 2519 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2524 { 2525 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2526 Mat a,b; 2527 PetscBool flg; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 a = matA->A; 2532 b = matB->A; 2533 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2534 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2539 { 2540 PetscErrorCode ierr; 2541 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2542 2543 PetscFunctionBegin; 2544 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2545 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2546 ierr = PetscFree(atb);CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2551 { 2552 PetscErrorCode ierr; 2553 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2554 2555 PetscFunctionBegin; 2556 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2557 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2558 ierr = PetscFree(abt);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2563 { 2564 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2565 Mat_TransMatMultDense *atb; 2566 PetscErrorCode ierr; 2567 MPI_Comm comm; 2568 PetscMPIInt size,*recvcounts; 2569 PetscScalar *carray,*sendbuf; 2570 const PetscScalar *atbarray; 2571 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2572 const PetscInt *ranges; 2573 2574 PetscFunctionBegin; 2575 MatCheckProduct(C,3); 2576 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2577 atb = (Mat_TransMatMultDense *)C->product->data; 2578 recvcounts = atb->recvcounts; 2579 sendbuf = atb->sendbuf; 2580 2581 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2582 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2583 2584 /* compute atbarray = aseq^T * bseq */ 2585 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2586 2587 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2588 2589 /* arrange atbarray into sendbuf */ 2590 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2591 for (proc=0, k=0; proc<size; proc++) { 2592 for (j=0; j<cN; j++) { 2593 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2594 } 2595 } 2596 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2597 2598 /* sum all atbarray to local values of C */ 2599 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2600 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2601 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2602 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2603 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2604 PetscFunctionReturn(0); 2605 } 2606 2607 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2608 { 2609 PetscErrorCode ierr; 2610 MPI_Comm comm; 2611 PetscMPIInt size; 2612 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2613 Mat_TransMatMultDense *atb; 2614 PetscBool cisdense; 2615 PetscInt i; 2616 const PetscInt *ranges; 2617 2618 PetscFunctionBegin; 2619 MatCheckProduct(C,3); 2620 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2621 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2622 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2623 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2624 } 2625 2626 /* create matrix product C */ 2627 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2628 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2629 if (!cisdense) { 2630 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2631 } 2632 ierr = MatSetUp(C);CHKERRQ(ierr); 2633 2634 /* create data structure for reuse C */ 2635 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2636 ierr = PetscNew(&atb);CHKERRQ(ierr); 2637 cM = C->rmap->N; 2638 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2639 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2640 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2641 2642 C->product->data = atb; 2643 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2644 PetscFunctionReturn(0); 2645 } 2646 2647 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2648 { 2649 PetscErrorCode ierr; 2650 MPI_Comm comm; 2651 PetscMPIInt i, size; 2652 PetscInt maxRows, bufsiz; 2653 PetscMPIInt tag; 2654 PetscInt alg; 2655 Mat_MatTransMultDense *abt; 2656 Mat_Product *product = C->product; 2657 PetscBool flg; 2658 2659 PetscFunctionBegin; 2660 MatCheckProduct(C,4); 2661 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2662 /* check local size of A and B */ 2663 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2664 2665 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2666 alg = flg ? 0 : 1; 2667 2668 /* setup matrix product C */ 2669 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2670 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2671 ierr = MatSetUp(C);CHKERRQ(ierr); 2672 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2673 2674 /* create data structure for reuse C */ 2675 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2676 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2677 ierr = PetscNew(&abt);CHKERRQ(ierr); 2678 abt->tag = tag; 2679 abt->alg = alg; 2680 switch (alg) { 2681 case 1: /* alg: "cyclic" */ 2682 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2683 bufsiz = A->cmap->N * maxRows; 2684 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2685 break; 2686 default: /* alg: "allgatherv" */ 2687 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2688 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2689 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2690 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2691 break; 2692 } 2693 2694 C->product->data = abt; 2695 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2696 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2697 PetscFunctionReturn(0); 2698 } 2699 2700 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2701 { 2702 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2703 Mat_MatTransMultDense *abt; 2704 PetscErrorCode ierr; 2705 MPI_Comm comm; 2706 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2707 PetscScalar *sendbuf, *recvbuf=0, *cv; 2708 PetscInt i,cK=A->cmap->N,k,j,bn; 2709 PetscScalar _DOne=1.0,_DZero=0.0; 2710 const PetscScalar *av,*bv; 2711 PetscBLASInt cm, cn, ck, alda, blda, clda; 2712 MPI_Request reqs[2]; 2713 const PetscInt *ranges; 2714 2715 PetscFunctionBegin; 2716 MatCheckProduct(C,3); 2717 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2718 abt = (Mat_MatTransMultDense*)C->product->data; 2719 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2720 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2721 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2722 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2723 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2724 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2725 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2727 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2729 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2731 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2732 bn = B->rmap->n; 2733 if (blda == bn) { 2734 sendbuf = (PetscScalar*)bv; 2735 } else { 2736 sendbuf = abt->buf[0]; 2737 for (k = 0, i = 0; i < cK; i++) { 2738 for (j = 0; j < bn; j++, k++) { 2739 sendbuf[k] = bv[i * blda + j]; 2740 } 2741 } 2742 } 2743 if (size > 1) { 2744 sendto = (rank + size - 1) % size; 2745 recvfrom = (rank + size + 1) % size; 2746 } else { 2747 sendto = recvfrom = 0; 2748 } 2749 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2750 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2751 recvisfrom = rank; 2752 for (i = 0; i < size; i++) { 2753 /* we have finished receiving in sending, bufs can be read/modified */ 2754 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2755 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2756 2757 if (nextrecvisfrom != rank) { 2758 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2759 sendsiz = cK * bn; 2760 recvsiz = cK * nextbn; 2761 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2762 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2763 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2764 } 2765 2766 /* local aseq * sendbuf^T */ 2767 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2768 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2769 2770 if (nextrecvisfrom != rank) { 2771 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2772 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2773 } 2774 bn = nextbn; 2775 recvisfrom = nextrecvisfrom; 2776 sendbuf = recvbuf; 2777 } 2778 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2779 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2780 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2781 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2787 { 2788 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2789 Mat_MatTransMultDense *abt; 2790 PetscErrorCode ierr; 2791 MPI_Comm comm; 2792 PetscMPIInt size; 2793 PetscScalar *cv, *sendbuf, *recvbuf; 2794 const PetscScalar *av,*bv; 2795 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2796 PetscScalar _DOne=1.0,_DZero=0.0; 2797 PetscBLASInt cm, cn, ck, alda, clda; 2798 2799 PetscFunctionBegin; 2800 MatCheckProduct(C,3); 2801 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2802 abt = (Mat_MatTransMultDense*)C->product->data; 2803 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2805 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2806 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2807 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2808 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2809 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2810 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2811 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2812 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2813 /* copy transpose of B into buf[0] */ 2814 bn = B->rmap->n; 2815 sendbuf = abt->buf[0]; 2816 recvbuf = abt->buf[1]; 2817 for (k = 0, j = 0; j < bn; j++) { 2818 for (i = 0; i < cK; i++, k++) { 2819 sendbuf[k] = bv[i * blda + j]; 2820 } 2821 } 2822 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2823 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2824 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2825 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2826 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2827 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2828 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2829 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2830 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2831 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2832 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2837 { 2838 Mat_MatTransMultDense *abt; 2839 PetscErrorCode ierr; 2840 2841 PetscFunctionBegin; 2842 MatCheckProduct(C,3); 2843 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2844 abt = (Mat_MatTransMultDense*)C->product->data; 2845 switch (abt->alg) { 2846 case 1: 2847 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2848 break; 2849 default: 2850 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2851 break; 2852 } 2853 PetscFunctionReturn(0); 2854 } 2855 2856 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2857 { 2858 PetscErrorCode ierr; 2859 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2860 2861 PetscFunctionBegin; 2862 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2863 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2864 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2865 ierr = PetscFree(ab);CHKERRQ(ierr); 2866 PetscFunctionReturn(0); 2867 } 2868 2869 #if defined(PETSC_HAVE_ELEMENTAL) 2870 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2871 { 2872 PetscErrorCode ierr; 2873 Mat_MatMultDense *ab; 2874 2875 PetscFunctionBegin; 2876 MatCheckProduct(C,3); 2877 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2878 ab = (Mat_MatMultDense*)C->product->data; 2879 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2880 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2881 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2882 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2883 PetscFunctionReturn(0); 2884 } 2885 2886 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2887 { 2888 PetscErrorCode ierr; 2889 Mat Ae,Be,Ce; 2890 Mat_MatMultDense *ab; 2891 2892 PetscFunctionBegin; 2893 MatCheckProduct(C,4); 2894 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2895 /* check local size of A and B */ 2896 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2897 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2898 } 2899 2900 /* create elemental matrices Ae and Be */ 2901 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2902 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2903 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2904 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2905 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2906 2907 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2908 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2909 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2910 ierr = MatSetUp(Be);CHKERRQ(ierr); 2911 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2912 2913 /* compute symbolic Ce = Ae*Be */ 2914 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2915 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2916 2917 /* setup C */ 2918 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2919 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2920 ierr = MatSetUp(C);CHKERRQ(ierr); 2921 2922 /* create data structure for reuse Cdense */ 2923 ierr = PetscNew(&ab);CHKERRQ(ierr); 2924 ab->Ae = Ae; 2925 ab->Be = Be; 2926 ab->Ce = Ce; 2927 2928 C->product->data = ab; 2929 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2930 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2931 PetscFunctionReturn(0); 2932 } 2933 #endif 2934 /* ----------------------------------------------- */ 2935 #if defined(PETSC_HAVE_ELEMENTAL) 2936 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2937 { 2938 PetscFunctionBegin; 2939 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2940 C->ops->productsymbolic = MatProductSymbolic_AB; 2941 PetscFunctionReturn(0); 2942 } 2943 #endif 2944 2945 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2946 { 2947 Mat_Product *product = C->product; 2948 Mat A = product->A,B=product->B; 2949 2950 PetscFunctionBegin; 2951 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2952 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2953 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2954 C->ops->productsymbolic = MatProductSymbolic_AtB; 2955 PetscFunctionReturn(0); 2956 } 2957 2958 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2959 { 2960 PetscErrorCode ierr; 2961 Mat_Product *product = C->product; 2962 const char *algTypes[2] = {"allgatherv","cyclic"}; 2963 PetscInt alg,nalg = 2; 2964 PetscBool flg = PETSC_FALSE; 2965 2966 PetscFunctionBegin; 2967 /* Set default algorithm */ 2968 alg = 0; /* default is allgatherv */ 2969 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2970 if (flg) { 2971 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2972 } 2973 2974 /* Get runtime option */ 2975 if (product->api_user) { 2976 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2977 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2978 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2979 } else { 2980 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2981 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2982 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2983 } 2984 if (flg) { 2985 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2986 } 2987 2988 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2989 C->ops->productsymbolic = MatProductSymbolic_ABt; 2990 PetscFunctionReturn(0); 2991 } 2992 2993 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2994 { 2995 PetscErrorCode ierr; 2996 Mat_Product *product = C->product; 2997 2998 PetscFunctionBegin; 2999 switch (product->type) { 3000 #if defined(PETSC_HAVE_ELEMENTAL) 3001 case MATPRODUCT_AB: 3002 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3003 break; 3004 #endif 3005 case MATPRODUCT_AtB: 3006 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3007 break; 3008 case MATPRODUCT_ABt: 3009 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3010 break; 3011 default: 3012 break; 3013 } 3014 PetscFunctionReturn(0); 3015 } 3016