1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 59 PetscErrorCode ierr; 60 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 61 62 PetscFunctionBegin; 63 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 64 lrow = row - rstart; 65 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 PetscBool flg; 77 Mat B; 78 79 PetscFunctionBegin; 80 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 81 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 82 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 83 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 84 85 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 86 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 87 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 88 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 89 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 90 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 91 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 92 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 93 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 97 *a = B; 98 ierr = MatDestroy(&B);CHKERRQ(ierr); 99 } else *a = B; 100 PetscFunctionReturn(0); 101 } 102 103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 104 { 105 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 106 PetscErrorCode ierr; 107 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 108 PetscBool roworiented = A->roworiented; 109 110 PetscFunctionBegin; 111 for (i=0; i<m; i++) { 112 if (idxm[i] < 0) continue; 113 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 114 if (idxm[i] >= rstart && idxm[i] < rend) { 115 row = idxm[i] - rstart; 116 if (roworiented) { 117 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 118 } else { 119 for (j=0; j<n; j++) { 120 if (idxn[j] < 0) continue; 121 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 122 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 123 } 124 } 125 } else if (!A->donotstash) { 126 mat->assembled = PETSC_FALSE; 127 if (roworiented) { 128 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 129 } else { 130 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 160 { 161 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 162 PetscErrorCode ierr; 163 164 PetscFunctionBegin; 165 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 170 { 171 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 172 PetscBool iscuda; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 if (!a->A) { 177 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 178 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 179 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 180 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 181 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 182 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 183 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 184 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 185 } 186 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 191 { 192 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 197 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 230 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 231 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 236 { 237 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 238 PetscErrorCode ierr; 239 240 PetscFunctionBegin; 241 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 242 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 243 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 248 { 249 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 250 PetscErrorCode ierr; 251 252 PetscFunctionBegin; 253 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 254 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 255 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 260 { 261 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 262 PetscErrorCode ierr; 263 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 264 const PetscInt *irow,*icol; 265 const PetscScalar *v; 266 PetscScalar *bv; 267 Mat newmat; 268 IS iscol_local; 269 MPI_Comm comm_is,comm_mat; 270 271 PetscFunctionBegin; 272 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 273 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 274 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 275 276 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 278 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 279 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 280 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 281 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 282 283 /* No parallel redistribution currently supported! Should really check each index set 284 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 285 original matrix! */ 286 287 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 288 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 289 290 /* Check submatrix call */ 291 if (scall == MAT_REUSE_MATRIX) { 292 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 293 /* Really need to test rows and column sizes! */ 294 newmat = *B; 295 } else { 296 /* Create and fill new matrix */ 297 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 298 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 299 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 300 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 301 } 302 303 /* Now extract the data pointers and do the copy, column at a time */ 304 newmatd = (Mat_MPIDense*)newmat->data; 305 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 306 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 307 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 308 for (i=0; i<Ncols; i++) { 309 const PetscScalar *av = v + lda*icol[i]; 310 for (j=0; j<nrows; j++) { 311 *bv++ = av[irow[j] - rstart]; 312 } 313 } 314 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 315 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 316 317 /* Assemble the matrices so that the correct flags are set */ 318 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 321 /* Free work space */ 322 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 323 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 324 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 325 *B = newmat; 326 PetscFunctionReturn(0); 327 } 328 329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 330 { 331 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 334 PetscFunctionBegin; 335 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 340 { 341 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 360 { 361 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt nstash,reallocs; 364 365 PetscFunctionBegin; 366 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 367 368 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 369 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 370 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 375 { 376 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 377 PetscErrorCode ierr; 378 PetscInt i,*row,*col,flg,j,rstart,ncols; 379 PetscMPIInt n; 380 PetscScalar *val; 381 382 PetscFunctionBegin; 383 if (!mdn->donotstash && !mat->nooffprocentries) { 384 /* wait on receives */ 385 while (1) { 386 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 387 if (!flg) break; 388 389 for (i=0; i<n;) { 390 /* Now identify the consecutive vals belonging to the same row */ 391 for (j=i,rstart=row[j]; j<n; j++) { 392 if (row[j] != rstart) break; 393 } 394 if (j < n) ncols = j-i; 395 else ncols = n-i; 396 /* Now assemble all these values with a single function call */ 397 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 398 i = j; 399 } 400 } 401 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 402 } 403 404 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 405 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 406 407 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 408 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 409 } 410 PetscFunctionReturn(0); 411 } 412 413 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 414 { 415 PetscErrorCode ierr; 416 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 424 { 425 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 426 PetscErrorCode ierr; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 438 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 441 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 442 } 443 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 444 if (diag != 0.0) { 445 Vec d; 446 447 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 448 ierr = VecSet(d,diag);CHKERRQ(ierr); 449 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 450 ierr = VecDestroy(&d);CHKERRQ(ierr); 451 } 452 ierr = PetscFree(lrows);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 PetscErrorCode ierr; 465 const PetscScalar *ax; 466 PetscScalar *ay; 467 PetscMemType axmtype,aymtype; 468 469 PetscFunctionBegin; 470 ierr = VecGetArrayReadInPlace_Internal(xx,&ax,&axmtype);CHKERRQ(ierr); 471 ierr = VecGetArrayInPlace_Internal(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 472 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 473 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 474 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 475 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 476 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 PetscErrorCode ierr; 484 const PetscScalar *ax; 485 PetscScalar *ay; 486 PetscMemType axmtype,aymtype; 487 488 PetscFunctionBegin; 489 ierr = VecGetArrayReadInPlace_Internal(xx,&ax,&axmtype);CHKERRQ(ierr); 490 ierr = VecGetArrayInPlace_Internal(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 491 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 492 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 493 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 494 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 495 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 PetscErrorCode ierr; 503 const PetscScalar *ax; 504 PetscScalar *ay; 505 PetscMemType axmtype,aymtype; 506 507 PetscFunctionBegin; 508 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 509 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecGetArrayReadInPlace_Internal(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 511 ierr = VecGetArrayInPlace_Internal(yy,&ay,&aymtype);CHKERRQ(ierr); 512 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 513 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 514 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 515 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 520 { 521 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 522 PetscErrorCode ierr; 523 const PetscScalar *ax; 524 PetscScalar *ay; 525 PetscMemType axmtype,aymtype; 526 527 PetscFunctionBegin; 528 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 529 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 530 ierr = VecGetArrayReadInPlace_Internal(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 531 ierr = VecGetArrayInPlace_Internal(zz,&ay,&aymtype);CHKERRQ(ierr); 532 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 533 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 534 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 535 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 PetscErrorCode ierr; 543 PetscInt lda,len,i,n,m = A->rmap->n,radd; 544 PetscScalar *x,zero = 0.0; 545 const PetscScalar *av; 546 547 PetscFunctionBegin; 548 ierr = VecSet(v,zero);CHKERRQ(ierr); 549 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 550 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 551 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 552 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 553 radd = A->rmap->rstart*m; 554 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 555 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 556 for (i=0; i<len; i++) { 557 x[i] = av[radd + i*lda + i]; 558 } 559 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 560 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 561 PetscFunctionReturn(0); 562 } 563 564 PetscErrorCode MatDestroy_MPIDense(Mat mat) 565 { 566 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 567 PetscErrorCode ierr; 568 569 PetscFunctionBegin; 570 #if defined(PETSC_USE_LOG) 571 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 572 #endif 573 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 574 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 575 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 576 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 577 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 578 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 579 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 580 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 581 582 ierr = PetscFree(mat->data);CHKERRQ(ierr); 583 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 584 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 596 #if defined(PETSC_HAVE_ELEMENTAL) 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 598 #endif 599 #if defined(PETSC_HAVE_SCALAPACK) 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 601 #endif 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 605 #if defined (PETSC_HAVE_CUDA) 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 608 #endif 609 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 611 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 612 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 614 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 616 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 618 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 623 624 #include <petscdraw.h> 625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 626 { 627 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 628 PetscErrorCode ierr; 629 PetscMPIInt rank; 630 PetscViewerType vtype; 631 PetscBool iascii,isdraw; 632 PetscViewer sviewer; 633 PetscViewerFormat format; 634 635 PetscFunctionBegin; 636 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 637 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 638 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 639 if (iascii) { 640 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 641 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 642 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 643 MatInfo info; 644 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 645 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 646 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 647 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 648 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 649 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } else if (format == PETSC_VIEWER_ASCII_INFO) { 652 PetscFunctionReturn(0); 653 } 654 } else if (isdraw) { 655 PetscDraw draw; 656 PetscBool isnull; 657 658 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 659 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 660 if (isnull) PetscFunctionReturn(0); 661 } 662 663 { 664 /* assemble the entire matrix onto first processor. */ 665 Mat A; 666 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 667 PetscInt *cols; 668 PetscScalar *vals; 669 670 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 671 if (!rank) { 672 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 673 } else { 674 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 675 } 676 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 677 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 678 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 679 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 680 681 /* Copy the matrix ... This isn't the most efficient means, 682 but it's quick for now */ 683 A->insertmode = INSERT_VALUES; 684 685 row = mat->rmap->rstart; 686 m = mdn->A->rmap->n; 687 for (i=0; i<m; i++) { 688 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 689 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 690 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 691 row++; 692 } 693 694 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 696 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 697 if (!rank) { 698 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 699 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 700 } 701 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 702 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 703 ierr = MatDestroy(&A);CHKERRQ(ierr); 704 } 705 PetscFunctionReturn(0); 706 } 707 708 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 709 { 710 PetscErrorCode ierr; 711 PetscBool iascii,isbinary,isdraw,issocket; 712 713 PetscFunctionBegin; 714 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 715 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 716 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 717 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 718 719 if (iascii || issocket || isdraw) { 720 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 721 } else if (isbinary) { 722 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 728 { 729 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 730 Mat mdn = mat->A; 731 PetscErrorCode ierr; 732 PetscLogDouble isend[5],irecv[5]; 733 734 PetscFunctionBegin; 735 info->block_size = 1.0; 736 737 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 738 739 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 740 isend[3] = info->memory; isend[4] = info->mallocs; 741 if (flag == MAT_LOCAL) { 742 info->nz_used = isend[0]; 743 info->nz_allocated = isend[1]; 744 info->nz_unneeded = isend[2]; 745 info->memory = isend[3]; 746 info->mallocs = isend[4]; 747 } else if (flag == MAT_GLOBAL_MAX) { 748 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 749 750 info->nz_used = irecv[0]; 751 info->nz_allocated = irecv[1]; 752 info->nz_unneeded = irecv[2]; 753 info->memory = irecv[3]; 754 info->mallocs = irecv[4]; 755 } else if (flag == MAT_GLOBAL_SUM) { 756 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 757 758 info->nz_used = irecv[0]; 759 info->nz_allocated = irecv[1]; 760 info->nz_unneeded = irecv[2]; 761 info->memory = irecv[3]; 762 info->mallocs = irecv[4]; 763 } 764 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 765 info->fill_ratio_needed = 0; 766 info->factor_mallocs = 0; 767 PetscFunctionReturn(0); 768 } 769 770 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 771 { 772 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 switch (op) { 777 case MAT_NEW_NONZERO_LOCATIONS: 778 case MAT_NEW_NONZERO_LOCATION_ERR: 779 case MAT_NEW_NONZERO_ALLOCATION_ERR: 780 MatCheckPreallocated(A,1); 781 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 782 break; 783 case MAT_ROW_ORIENTED: 784 MatCheckPreallocated(A,1); 785 a->roworiented = flg; 786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 787 break; 788 case MAT_NEW_DIAGONALS: 789 case MAT_KEEP_NONZERO_PATTERN: 790 case MAT_USE_HASH_TABLE: 791 case MAT_SORTED_FULL: 792 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 793 break; 794 case MAT_IGNORE_OFF_PROC_ENTRIES: 795 a->donotstash = flg; 796 break; 797 case MAT_SYMMETRIC: 798 case MAT_STRUCTURALLY_SYMMETRIC: 799 case MAT_HERMITIAN: 800 case MAT_SYMMETRY_ETERNAL: 801 case MAT_IGNORE_LOWER_TRIANGULAR: 802 case MAT_IGNORE_ZERO_ENTRIES: 803 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 804 break; 805 default: 806 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 812 { 813 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 814 const PetscScalar *l; 815 PetscScalar x,*v,*vv,*r; 816 PetscErrorCode ierr; 817 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 818 819 PetscFunctionBegin; 820 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 821 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 822 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 823 if (ll) { 824 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 825 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 826 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 827 for (i=0; i<m; i++) { 828 x = l[i]; 829 v = vv + i; 830 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 831 } 832 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 833 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 834 } 835 if (rr) { 836 const PetscScalar *ar; 837 838 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 839 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 840 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 841 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 842 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 843 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 844 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 845 for (i=0; i<n; i++) { 846 x = r[i]; 847 v = vv + i*lda; 848 for (j=0; j<m; j++) (*v++) *= x; 849 } 850 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 851 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 852 } 853 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 860 PetscErrorCode ierr; 861 PetscInt i,j; 862 PetscMPIInt size; 863 PetscReal sum = 0.0; 864 const PetscScalar *av,*v; 865 866 PetscFunctionBegin; 867 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 868 v = av; 869 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 870 if (size == 1) { 871 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 872 } else { 873 if (type == NORM_FROBENIUS) { 874 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 875 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 876 } 877 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 878 *nrm = PetscSqrtReal(*nrm); 879 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 880 } else if (type == NORM_1) { 881 PetscReal *tmp,*tmp2; 882 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 883 *nrm = 0.0; 884 v = av; 885 for (j=0; j<mdn->A->cmap->n; j++) { 886 for (i=0; i<mdn->A->rmap->n; i++) { 887 tmp[j] += PetscAbsScalar(*v); v++; 888 } 889 } 890 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 891 for (j=0; j<A->cmap->N; j++) { 892 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 893 } 894 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 895 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 896 } else if (type == NORM_INFINITY) { /* max row norm */ 897 PetscReal ntemp; 898 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 899 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 900 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 901 } 902 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 907 { 908 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 909 Mat B; 910 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 911 PetscErrorCode ierr; 912 PetscInt j,i,lda; 913 PetscScalar *v; 914 915 PetscFunctionBegin; 916 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 917 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 918 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 919 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 920 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 921 } else B = *matout; 922 923 m = a->A->rmap->n; n = a->A->cmap->n; 924 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 925 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 926 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 927 for (i=0; i<m; i++) rwork[i] = rstart + i; 928 for (j=0; j<n; j++) { 929 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 930 v += lda; 931 } 932 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 933 ierr = PetscFree(rwork);CHKERRQ(ierr); 934 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 935 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 936 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 937 *matout = B; 938 } else { 939 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 940 } 941 PetscFunctionReturn(0); 942 } 943 944 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 945 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 946 947 PetscErrorCode MatSetUp_MPIDense(Mat A) 948 { 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 953 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 954 if (!A->preallocated) { 955 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 956 } 957 PetscFunctionReturn(0); 958 } 959 960 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 961 { 962 PetscErrorCode ierr; 963 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 964 965 PetscFunctionBegin; 966 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatConjugate_MPIDense(Mat mat) 971 { 972 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = MatConjugate(a->A);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 PetscErrorCode MatRealPart_MPIDense(Mat A) 981 { 982 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = MatRealPart(a->A);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 991 { 992 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 993 PetscErrorCode ierr; 994 995 PetscFunctionBegin; 996 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1001 { 1002 PetscErrorCode ierr; 1003 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1004 1005 PetscFunctionBegin; 1006 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1007 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1008 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1013 1014 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1015 { 1016 PetscErrorCode ierr; 1017 PetscInt i,n; 1018 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1019 PetscReal *work; 1020 1021 PetscFunctionBegin; 1022 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1023 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1024 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1025 if (type == NORM_2) { 1026 for (i=0; i<n; i++) work[i] *= work[i]; 1027 } 1028 if (type == NORM_INFINITY) { 1029 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1030 } else { 1031 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(work);CHKERRQ(ierr); 1034 if (type == NORM_2) { 1035 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1036 } 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #if defined(PETSC_HAVE_CUDA) 1041 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1042 { 1043 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1044 PetscErrorCode ierr; 1045 PetscInt lda; 1046 1047 PetscFunctionBegin; 1048 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1049 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1050 if (!a->cvec) { 1051 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1052 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1053 } 1054 a->vecinuse = col + 1; 1055 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1056 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1057 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1058 *v = a->cvec; 1059 PetscFunctionReturn(0); 1060 } 1061 1062 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1063 { 1064 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1065 PetscErrorCode ierr; 1066 1067 PetscFunctionBegin; 1068 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1069 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1070 a->vecinuse = 0; 1071 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1072 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1073 *v = NULL; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1078 { 1079 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1080 PetscInt lda; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1085 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1086 if (!a->cvec) { 1087 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1088 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1089 } 1090 a->vecinuse = col + 1; 1091 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1092 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1093 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1094 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1095 *v = a->cvec; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1100 { 1101 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1106 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1107 a->vecinuse = 0; 1108 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1109 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1110 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1111 *v = NULL; 1112 PetscFunctionReturn(0); 1113 } 1114 1115 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1116 { 1117 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1118 PetscErrorCode ierr; 1119 PetscInt lda; 1120 1121 PetscFunctionBegin; 1122 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1123 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1124 if (!a->cvec) { 1125 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1126 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1127 } 1128 a->vecinuse = col + 1; 1129 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1130 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1131 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1132 *v = a->cvec; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1137 { 1138 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1143 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1144 a->vecinuse = 0; 1145 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1146 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1147 *v = NULL; 1148 PetscFunctionReturn(0); 1149 } 1150 1151 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1152 { 1153 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1158 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1159 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1164 { 1165 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1170 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1171 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1176 { 1177 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1182 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1183 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1188 { 1189 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1198 { 1199 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1208 { 1209 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1218 { 1219 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1220 PetscErrorCode ierr; 1221 1222 PetscFunctionBegin; 1223 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1228 { 1229 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1230 PetscErrorCode ierr; 1231 1232 PetscFunctionBegin; 1233 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1238 { 1239 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1248 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1249 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1250 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1251 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1252 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1253 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1254 1255 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1256 { 1257 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1262 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1263 if (d->A) { 1264 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1265 } 1266 mat->boundtocpu = bind; 1267 if (!bind) { 1268 PetscBool iscuda; 1269 1270 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1271 if (!iscuda) { 1272 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1273 } 1274 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1275 if (!iscuda) { 1276 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1277 } 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1284 } else { 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1291 } 1292 if (d->cmat) { 1293 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1299 { 1300 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1301 PetscErrorCode ierr; 1302 PetscBool iscuda; 1303 1304 PetscFunctionBegin; 1305 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1306 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1307 if (!iscuda) PetscFunctionReturn(0); 1308 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1309 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1310 if (!d->A) { 1311 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1312 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1313 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1314 } 1315 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1316 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1317 A->preallocated = PETSC_TRUE; 1318 PetscFunctionReturn(0); 1319 } 1320 #endif 1321 1322 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1323 { 1324 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1333 { 1334 PetscFunctionBegin; 1335 *missing = PETSC_FALSE; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1340 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1341 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1342 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1343 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1344 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1345 1346 /* -------------------------------------------------------------------*/ 1347 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1348 MatGetRow_MPIDense, 1349 MatRestoreRow_MPIDense, 1350 MatMult_MPIDense, 1351 /* 4*/ MatMultAdd_MPIDense, 1352 MatMultTranspose_MPIDense, 1353 MatMultTransposeAdd_MPIDense, 1354 NULL, 1355 NULL, 1356 NULL, 1357 /* 10*/ NULL, 1358 NULL, 1359 NULL, 1360 NULL, 1361 MatTranspose_MPIDense, 1362 /* 15*/ MatGetInfo_MPIDense, 1363 MatEqual_MPIDense, 1364 MatGetDiagonal_MPIDense, 1365 MatDiagonalScale_MPIDense, 1366 MatNorm_MPIDense, 1367 /* 20*/ MatAssemblyBegin_MPIDense, 1368 MatAssemblyEnd_MPIDense, 1369 MatSetOption_MPIDense, 1370 MatZeroEntries_MPIDense, 1371 /* 24*/ MatZeroRows_MPIDense, 1372 NULL, 1373 NULL, 1374 NULL, 1375 NULL, 1376 /* 29*/ MatSetUp_MPIDense, 1377 NULL, 1378 NULL, 1379 MatGetDiagonalBlock_MPIDense, 1380 NULL, 1381 /* 34*/ MatDuplicate_MPIDense, 1382 NULL, 1383 NULL, 1384 NULL, 1385 NULL, 1386 /* 39*/ MatAXPY_MPIDense, 1387 MatCreateSubMatrices_MPIDense, 1388 NULL, 1389 MatGetValues_MPIDense, 1390 NULL, 1391 /* 44*/ NULL, 1392 MatScale_MPIDense, 1393 MatShift_Basic, 1394 NULL, 1395 NULL, 1396 /* 49*/ MatSetRandom_MPIDense, 1397 NULL, 1398 NULL, 1399 NULL, 1400 NULL, 1401 /* 54*/ NULL, 1402 NULL, 1403 NULL, 1404 NULL, 1405 NULL, 1406 /* 59*/ MatCreateSubMatrix_MPIDense, 1407 MatDestroy_MPIDense, 1408 MatView_MPIDense, 1409 NULL, 1410 NULL, 1411 /* 64*/ NULL, 1412 NULL, 1413 NULL, 1414 NULL, 1415 NULL, 1416 /* 69*/ NULL, 1417 NULL, 1418 NULL, 1419 NULL, 1420 NULL, 1421 /* 74*/ NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 /* 79*/ NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 /* 83*/ MatLoad_MPIDense, 1431 NULL, 1432 NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 /* 89*/ NULL, 1437 NULL, 1438 NULL, 1439 NULL, 1440 NULL, 1441 /* 94*/ NULL, 1442 NULL, 1443 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1444 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1445 NULL, 1446 /* 99*/ MatProductSetFromOptions_MPIDense, 1447 NULL, 1448 NULL, 1449 MatConjugate_MPIDense, 1450 NULL, 1451 /*104*/ NULL, 1452 MatRealPart_MPIDense, 1453 MatImaginaryPart_MPIDense, 1454 NULL, 1455 NULL, 1456 /*109*/ NULL, 1457 NULL, 1458 NULL, 1459 MatGetColumnVector_MPIDense, 1460 MatMissingDiagonal_MPIDense, 1461 /*114*/ NULL, 1462 NULL, 1463 NULL, 1464 NULL, 1465 NULL, 1466 /*119*/ NULL, 1467 NULL, 1468 NULL, 1469 NULL, 1470 NULL, 1471 /*124*/ NULL, 1472 MatGetColumnNorms_MPIDense, 1473 NULL, 1474 NULL, 1475 NULL, 1476 /*129*/ NULL, 1477 NULL, 1478 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1479 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1480 NULL, 1481 /*134*/ NULL, 1482 NULL, 1483 NULL, 1484 NULL, 1485 NULL, 1486 /*139*/ NULL, 1487 NULL, 1488 NULL, 1489 NULL, 1490 NULL, 1491 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1492 /*145*/ NULL, 1493 NULL, 1494 NULL 1495 }; 1496 1497 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1498 { 1499 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1500 PetscBool iscuda; 1501 PetscErrorCode ierr; 1502 1503 PetscFunctionBegin; 1504 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1505 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1506 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1507 if (!a->A) { 1508 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1509 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1510 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1511 } 1512 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1513 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1514 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1515 mat->preallocated = PETSC_TRUE; 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #if defined(PETSC_HAVE_ELEMENTAL) 1520 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1521 { 1522 Mat mat_elemental; 1523 PetscErrorCode ierr; 1524 PetscScalar *v; 1525 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1526 1527 PetscFunctionBegin; 1528 if (reuse == MAT_REUSE_MATRIX) { 1529 mat_elemental = *newmat; 1530 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1533 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1534 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1535 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1536 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1537 } 1538 1539 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1540 for (i=0; i<N; i++) cols[i] = i; 1541 for (i=0; i<m; i++) rows[i] = rstart + i; 1542 1543 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1544 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1545 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1546 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1549 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1550 1551 if (reuse == MAT_INPLACE_MATRIX) { 1552 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1553 } else { 1554 *newmat = mat_elemental; 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 #endif 1559 1560 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1561 { 1562 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1571 { 1572 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1581 { 1582 PetscErrorCode ierr; 1583 Mat_MPIDense *mat; 1584 PetscInt m,nloc,N; 1585 1586 PetscFunctionBegin; 1587 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1588 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1589 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1590 PetscInt sum; 1591 1592 if (n == PETSC_DECIDE) { 1593 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1594 } 1595 /* Check sum(n) = N */ 1596 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1597 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1598 1599 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1600 } 1601 1602 /* numeric phase */ 1603 mat = (Mat_MPIDense*)(*outmat)->data; 1604 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1605 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1606 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 #if defined(PETSC_HAVE_CUDA) 1611 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1612 { 1613 Mat B; 1614 Mat_MPIDense *m; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 if (reuse == MAT_INITIAL_MATRIX) { 1619 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1620 } else if (reuse == MAT_REUSE_MATRIX) { 1621 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1622 } 1623 1624 B = *newmat; 1625 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1626 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1627 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1628 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1643 m = (Mat_MPIDense*)(B)->data; 1644 if (m->A) { 1645 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1646 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1647 } 1648 B->ops->bindtocpu = NULL; 1649 B->offloadmask = PETSC_OFFLOAD_CPU; 1650 PetscFunctionReturn(0); 1651 } 1652 1653 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1654 { 1655 Mat B; 1656 Mat_MPIDense *m; 1657 PetscErrorCode ierr; 1658 1659 PetscFunctionBegin; 1660 if (reuse == MAT_INITIAL_MATRIX) { 1661 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1662 } else if (reuse == MAT_REUSE_MATRIX) { 1663 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1664 } 1665 1666 B = *newmat; 1667 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1668 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1669 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1684 m = (Mat_MPIDense*)(B)->data; 1685 if (m->A) { 1686 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1687 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1688 B->offloadmask = PETSC_OFFLOAD_BOTH; 1689 } else { 1690 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1691 } 1692 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1693 1694 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1695 PetscFunctionReturn(0); 1696 } 1697 #endif 1698 1699 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1700 { 1701 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1702 PetscErrorCode ierr; 1703 PetscInt lda; 1704 1705 PetscFunctionBegin; 1706 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1707 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1708 if (!a->cvec) { 1709 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1710 } 1711 a->vecinuse = col + 1; 1712 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1713 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1714 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1715 *v = a->cvec; 1716 PetscFunctionReturn(0); 1717 } 1718 1719 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1720 { 1721 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1726 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1727 a->vecinuse = 0; 1728 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1729 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1730 *v = NULL; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1735 { 1736 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1737 PetscErrorCode ierr; 1738 PetscInt lda; 1739 1740 PetscFunctionBegin; 1741 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1742 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1743 if (!a->cvec) { 1744 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1745 } 1746 a->vecinuse = col + 1; 1747 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1748 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1749 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1750 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1751 *v = a->cvec; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1756 { 1757 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1762 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1763 a->vecinuse = 0; 1764 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1765 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1766 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1767 *v = NULL; 1768 PetscFunctionReturn(0); 1769 } 1770 1771 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1772 { 1773 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1774 PetscErrorCode ierr; 1775 PetscInt lda; 1776 1777 PetscFunctionBegin; 1778 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1779 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1780 if (!a->cvec) { 1781 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1782 } 1783 a->vecinuse = col + 1; 1784 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1785 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1786 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1787 *v = a->cvec; 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1792 { 1793 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1798 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1799 a->vecinuse = 0; 1800 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1801 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1802 *v = NULL; 1803 PetscFunctionReturn(0); 1804 } 1805 1806 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1807 { 1808 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1809 Mat_MPIDense *c; 1810 PetscErrorCode ierr; 1811 MPI_Comm comm; 1812 PetscBool setup = PETSC_FALSE; 1813 1814 PetscFunctionBegin; 1815 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1816 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1817 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1818 if (!a->cmat) { 1819 setup = PETSC_TRUE; 1820 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1821 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1822 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1823 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1824 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1825 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1826 } else if (cend-cbegin != a->cmat->cmap->N) { 1827 setup = PETSC_TRUE; 1828 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1829 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1830 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1831 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1832 } 1833 c = (Mat_MPIDense*)a->cmat->data; 1834 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1835 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1836 if (setup) { /* do we really need this? */ 1837 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1838 } 1839 a->cmat->preallocated = PETSC_TRUE; 1840 a->cmat->assembled = PETSC_TRUE; 1841 a->matinuse = cbegin + 1; 1842 *v = a->cmat; 1843 PetscFunctionReturn(0); 1844 } 1845 1846 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1847 { 1848 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1849 Mat_MPIDense *c; 1850 PetscErrorCode ierr; 1851 1852 PetscFunctionBegin; 1853 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1854 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1855 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1856 a->matinuse = 0; 1857 c = (Mat_MPIDense*)a->cmat->data; 1858 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1859 *v = NULL; 1860 PetscFunctionReturn(0); 1861 } 1862 1863 /*MC 1864 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1865 1866 Options Database Keys: 1867 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1868 1869 Level: beginner 1870 1871 .seealso: MatCreateDense() 1872 1873 M*/ 1874 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1875 { 1876 Mat_MPIDense *a; 1877 PetscErrorCode ierr; 1878 1879 PetscFunctionBegin; 1880 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1881 mat->data = (void*)a; 1882 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1883 1884 mat->insertmode = NOT_SET_VALUES; 1885 1886 /* build cache for off array entries formed */ 1887 a->donotstash = PETSC_FALSE; 1888 1889 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1890 1891 /* stuff used for matrix vector multiply */ 1892 a->lvec = NULL; 1893 a->Mvctx = NULL; 1894 a->roworiented = PETSC_TRUE; 1895 1896 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1898 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1899 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1900 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1915 #if defined(PETSC_HAVE_ELEMENTAL) 1916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1917 #endif 1918 #if defined(PETSC_HAVE_SCALAPACK) 1919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1920 #endif 1921 #if defined(PETSC_HAVE_CUDA) 1922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1923 #endif 1924 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1925 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1926 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1927 #if defined(PETSC_HAVE_CUDA) 1928 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1929 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1930 #endif 1931 1932 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1933 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1934 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 /*MC 1939 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1940 1941 Options Database Keys: 1942 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1943 1944 Level: beginner 1945 1946 .seealso: 1947 1948 M*/ 1949 #if defined(PETSC_HAVE_CUDA) 1950 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1951 { 1952 PetscErrorCode ierr; 1953 1954 PetscFunctionBegin; 1955 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 1956 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1957 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 #endif 1961 1962 /*MC 1963 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1964 1965 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1966 and MATMPIDENSE otherwise. 1967 1968 Options Database Keys: 1969 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1970 1971 Level: beginner 1972 1973 1974 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1975 M*/ 1976 1977 /*MC 1978 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1979 1980 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1981 and MATMPIDENSECUDA otherwise. 1982 1983 Options Database Keys: 1984 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1985 1986 Level: beginner 1987 1988 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1989 M*/ 1990 1991 /*@C 1992 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1993 1994 Collective 1995 1996 Input Parameters: 1997 . B - the matrix 1998 - data - optional location of matrix data. Set data=NULL for PETSc 1999 to control all matrix memory allocation. 2000 2001 Notes: 2002 The dense format is fully compatible with standard Fortran 77 2003 storage by columns. 2004 2005 The data input variable is intended primarily for Fortran programmers 2006 who wish to allocate their own matrix memory space. Most users should 2007 set data=NULL. 2008 2009 Level: intermediate 2010 2011 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2012 @*/ 2013 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2014 { 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2019 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 /*@ 2024 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2025 array provided by the user. This is useful to avoid copying an array 2026 into a matrix 2027 2028 Not Collective 2029 2030 Input Parameters: 2031 + mat - the matrix 2032 - array - the array in column major order 2033 2034 Notes: 2035 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2036 freed when the matrix is destroyed. 2037 2038 Level: developer 2039 2040 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2041 2042 @*/ 2043 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2044 { 2045 PetscErrorCode ierr; 2046 2047 PetscFunctionBegin; 2048 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2049 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2050 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2051 #if defined(PETSC_HAVE_CUDA) 2052 mat->offloadmask = PETSC_OFFLOAD_CPU; 2053 #endif 2054 PetscFunctionReturn(0); 2055 } 2056 2057 /*@ 2058 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2059 2060 Not Collective 2061 2062 Input Parameters: 2063 . mat - the matrix 2064 2065 Notes: 2066 You can only call this after a call to MatDensePlaceArray() 2067 2068 Level: developer 2069 2070 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2071 2072 @*/ 2073 PetscErrorCode MatDenseResetArray(Mat mat) 2074 { 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2079 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2080 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 /*@ 2085 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2086 array provided by the user. This is useful to avoid copying an array 2087 into a matrix 2088 2089 Not Collective 2090 2091 Input Parameters: 2092 + mat - the matrix 2093 - array - the array in column major order 2094 2095 Notes: 2096 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2097 freed by the user. It will be freed when the matrix is destroyed. 2098 2099 Level: developer 2100 2101 .seealso: MatDenseGetArray(), VecReplaceArray() 2102 @*/ 2103 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2104 { 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2109 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2110 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2111 #if defined(PETSC_HAVE_CUDA) 2112 mat->offloadmask = PETSC_OFFLOAD_CPU; 2113 #endif 2114 PetscFunctionReturn(0); 2115 } 2116 2117 #if defined(PETSC_HAVE_CUDA) 2118 /*@C 2119 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2120 array provided by the user. This is useful to avoid copying an array 2121 into a matrix 2122 2123 Not Collective 2124 2125 Input Parameters: 2126 + mat - the matrix 2127 - array - the array in column major order 2128 2129 Notes: 2130 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2131 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2132 2133 Level: developer 2134 2135 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2136 @*/ 2137 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2138 { 2139 PetscErrorCode ierr; 2140 2141 PetscFunctionBegin; 2142 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2143 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2144 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2145 mat->offloadmask = PETSC_OFFLOAD_GPU; 2146 PetscFunctionReturn(0); 2147 } 2148 2149 /*@C 2150 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2151 2152 Not Collective 2153 2154 Input Parameters: 2155 . mat - the matrix 2156 2157 Notes: 2158 You can only call this after a call to MatDenseCUDAPlaceArray() 2159 2160 Level: developer 2161 2162 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2163 2164 @*/ 2165 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2166 { 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2171 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2172 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /*@C 2177 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2178 array provided by the user. This is useful to avoid copying an array 2179 into a matrix 2180 2181 Not Collective 2182 2183 Input Parameters: 2184 + mat - the matrix 2185 - array - the array in column major order 2186 2187 Notes: 2188 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2189 The memory passed in CANNOT be freed by the user. It will be freed 2190 when the matrix is destroyed. The array should respect the matrix leading dimension. 2191 2192 Level: developer 2193 2194 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2195 @*/ 2196 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2197 { 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2202 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2203 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2204 mat->offloadmask = PETSC_OFFLOAD_GPU; 2205 PetscFunctionReturn(0); 2206 } 2207 2208 /*@C 2209 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2210 2211 Not Collective 2212 2213 Input Parameters: 2214 . A - the matrix 2215 2216 Output Parameters 2217 . array - the GPU array in column major order 2218 2219 Notes: 2220 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2221 2222 Level: developer 2223 2224 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2225 @*/ 2226 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2227 { 2228 PetscErrorCode ierr; 2229 2230 PetscFunctionBegin; 2231 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2232 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2233 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 2237 /*@C 2238 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2239 2240 Not Collective 2241 2242 Input Parameters: 2243 + A - the matrix 2244 - array - the GPU array in column major order 2245 2246 Notes: 2247 2248 Level: developer 2249 2250 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2251 @*/ 2252 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2253 { 2254 PetscErrorCode ierr; 2255 2256 PetscFunctionBegin; 2257 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2258 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2259 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2260 A->offloadmask = PETSC_OFFLOAD_GPU; 2261 PetscFunctionReturn(0); 2262 } 2263 2264 /*@C 2265 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2266 2267 Not Collective 2268 2269 Input Parameters: 2270 . A - the matrix 2271 2272 Output Parameters 2273 . array - the GPU array in column major order 2274 2275 Notes: 2276 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2277 2278 Level: developer 2279 2280 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2281 @*/ 2282 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2283 { 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2288 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 /*@C 2293 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2294 2295 Not Collective 2296 2297 Input Parameters: 2298 + A - the matrix 2299 - array - the GPU array in column major order 2300 2301 Notes: 2302 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2303 2304 Level: developer 2305 2306 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2307 @*/ 2308 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2309 { 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 /*@C 2318 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2319 2320 Not Collective 2321 2322 Input Parameters: 2323 . A - the matrix 2324 2325 Output Parameters 2326 . array - the GPU array in column major order 2327 2328 Notes: 2329 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2330 2331 Level: developer 2332 2333 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2334 @*/ 2335 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2336 { 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2341 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2342 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 /*@C 2347 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2348 2349 Not Collective 2350 2351 Input Parameters: 2352 + A - the matrix 2353 - array - the GPU array in column major order 2354 2355 Notes: 2356 2357 Level: developer 2358 2359 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2360 @*/ 2361 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2362 { 2363 PetscErrorCode ierr; 2364 2365 PetscFunctionBegin; 2366 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2367 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2368 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2369 A->offloadmask = PETSC_OFFLOAD_GPU; 2370 PetscFunctionReturn(0); 2371 } 2372 #endif 2373 2374 /*@C 2375 MatCreateDense - Creates a matrix in dense format. 2376 2377 Collective 2378 2379 Input Parameters: 2380 + comm - MPI communicator 2381 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2382 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2383 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2384 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2385 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2386 to control all matrix memory allocation. 2387 2388 Output Parameter: 2389 . A - the matrix 2390 2391 Notes: 2392 The dense format is fully compatible with standard Fortran 77 2393 storage by columns. 2394 Note that, although local portions of the matrix are stored in column-major 2395 order, the matrix is partitioned across MPI ranks by row. 2396 2397 The data input variable is intended primarily for Fortran programmers 2398 who wish to allocate their own matrix memory space. Most users should 2399 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2400 2401 The user MUST specify either the local or global matrix dimensions 2402 (possibly both). 2403 2404 Level: intermediate 2405 2406 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2407 @*/ 2408 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2409 { 2410 PetscErrorCode ierr; 2411 PetscMPIInt size; 2412 2413 PetscFunctionBegin; 2414 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2415 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2416 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2417 if (size > 1) { 2418 PetscBool havedata = (PetscBool)!!data; 2419 2420 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2421 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2422 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 2423 if (havedata) { /* user provided data array, so no need to assemble */ 2424 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2425 (*A)->assembled = PETSC_TRUE; 2426 } 2427 } else { 2428 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2429 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #if defined(PETSC_HAVE_CUDA) 2435 /*@C 2436 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2437 2438 Collective 2439 2440 Input Parameters: 2441 + comm - MPI communicator 2442 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2443 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2444 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2445 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2446 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2447 to control matrix memory allocation. 2448 2449 Output Parameter: 2450 . A - the matrix 2451 2452 Notes: 2453 2454 Level: intermediate 2455 2456 .seealso: MatCreate(), MatCreateDense() 2457 @*/ 2458 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2459 { 2460 PetscErrorCode ierr; 2461 PetscMPIInt size; 2462 2463 PetscFunctionBegin; 2464 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2465 PetscValidLogicalCollectiveBool(*A,!!data,6); 2466 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2467 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2468 if (size > 1) { 2469 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2470 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2471 if (data) { /* user provided data array, so no need to assemble */ 2472 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2473 (*A)->assembled = PETSC_TRUE; 2474 } 2475 } else { 2476 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2477 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2478 } 2479 PetscFunctionReturn(0); 2480 } 2481 #endif 2482 2483 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2484 { 2485 Mat mat; 2486 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 *newmat = NULL; 2491 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2492 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2493 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2494 a = (Mat_MPIDense*)mat->data; 2495 2496 mat->factortype = A->factortype; 2497 mat->assembled = PETSC_TRUE; 2498 mat->preallocated = PETSC_TRUE; 2499 2500 mat->insertmode = NOT_SET_VALUES; 2501 a->donotstash = oldmat->donotstash; 2502 2503 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2504 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2505 2506 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2507 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2508 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2509 2510 *newmat = mat; 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2515 { 2516 PetscErrorCode ierr; 2517 PetscBool isbinary; 2518 #if defined(PETSC_HAVE_HDF5) 2519 PetscBool ishdf5; 2520 #endif 2521 2522 PetscFunctionBegin; 2523 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2524 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2525 /* force binary viewer to load .info file if it has not yet done so */ 2526 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2527 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2528 #if defined(PETSC_HAVE_HDF5) 2529 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2530 #endif 2531 if (isbinary) { 2532 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2533 #if defined(PETSC_HAVE_HDF5) 2534 } else if (ishdf5) { 2535 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2536 #endif 2537 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2542 { 2543 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2544 Mat a,b; 2545 PetscBool flg; 2546 PetscErrorCode ierr; 2547 2548 PetscFunctionBegin; 2549 a = matA->A; 2550 b = matB->A; 2551 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2552 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2557 { 2558 PetscErrorCode ierr; 2559 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2560 2561 PetscFunctionBegin; 2562 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2563 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2564 ierr = PetscFree(atb);CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2569 { 2570 PetscErrorCode ierr; 2571 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2572 2573 PetscFunctionBegin; 2574 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2575 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2576 ierr = PetscFree(abt);CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2581 { 2582 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2583 Mat_TransMatMultDense *atb; 2584 PetscErrorCode ierr; 2585 MPI_Comm comm; 2586 PetscMPIInt size,*recvcounts; 2587 PetscScalar *carray,*sendbuf; 2588 const PetscScalar *atbarray; 2589 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2590 const PetscInt *ranges; 2591 2592 PetscFunctionBegin; 2593 MatCheckProduct(C,3); 2594 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2595 atb = (Mat_TransMatMultDense *)C->product->data; 2596 recvcounts = atb->recvcounts; 2597 sendbuf = atb->sendbuf; 2598 2599 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2601 2602 /* compute atbarray = aseq^T * bseq */ 2603 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2604 2605 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2606 2607 /* arrange atbarray into sendbuf */ 2608 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2609 for (proc=0, k=0; proc<size; proc++) { 2610 for (j=0; j<cN; j++) { 2611 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2612 } 2613 } 2614 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2615 2616 /* sum all atbarray to local values of C */ 2617 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2618 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2619 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2620 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2621 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2626 { 2627 PetscErrorCode ierr; 2628 MPI_Comm comm; 2629 PetscMPIInt size; 2630 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2631 Mat_TransMatMultDense *atb; 2632 PetscBool cisdense; 2633 PetscInt i; 2634 const PetscInt *ranges; 2635 2636 PetscFunctionBegin; 2637 MatCheckProduct(C,3); 2638 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2639 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2640 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2641 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2642 } 2643 2644 /* create matrix product C */ 2645 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2646 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2647 if (!cisdense) { 2648 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2649 } 2650 ierr = MatSetUp(C);CHKERRQ(ierr); 2651 2652 /* create data structure for reuse C */ 2653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2654 ierr = PetscNew(&atb);CHKERRQ(ierr); 2655 cM = C->rmap->N; 2656 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2657 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2658 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2659 2660 C->product->data = atb; 2661 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2662 PetscFunctionReturn(0); 2663 } 2664 2665 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2666 { 2667 PetscErrorCode ierr; 2668 MPI_Comm comm; 2669 PetscMPIInt i, size; 2670 PetscInt maxRows, bufsiz; 2671 PetscMPIInt tag; 2672 PetscInt alg; 2673 Mat_MatTransMultDense *abt; 2674 Mat_Product *product = C->product; 2675 PetscBool flg; 2676 2677 PetscFunctionBegin; 2678 MatCheckProduct(C,4); 2679 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2680 /* check local size of A and B */ 2681 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2682 2683 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2684 alg = flg ? 0 : 1; 2685 2686 /* setup matrix product C */ 2687 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2688 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2689 ierr = MatSetUp(C);CHKERRQ(ierr); 2690 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2691 2692 /* create data structure for reuse C */ 2693 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2694 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2695 ierr = PetscNew(&abt);CHKERRQ(ierr); 2696 abt->tag = tag; 2697 abt->alg = alg; 2698 switch (alg) { 2699 case 1: /* alg: "cyclic" */ 2700 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2701 bufsiz = A->cmap->N * maxRows; 2702 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2703 break; 2704 default: /* alg: "allgatherv" */ 2705 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2706 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2707 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2708 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2709 break; 2710 } 2711 2712 C->product->data = abt; 2713 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2714 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2715 PetscFunctionReturn(0); 2716 } 2717 2718 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2719 { 2720 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2721 Mat_MatTransMultDense *abt; 2722 PetscErrorCode ierr; 2723 MPI_Comm comm; 2724 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2725 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2726 PetscInt i,cK=A->cmap->N,k,j,bn; 2727 PetscScalar _DOne=1.0,_DZero=0.0; 2728 const PetscScalar *av,*bv; 2729 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2730 MPI_Request reqs[2]; 2731 const PetscInt *ranges; 2732 2733 PetscFunctionBegin; 2734 MatCheckProduct(C,3); 2735 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2736 abt = (Mat_MatTransMultDense*)C->product->data; 2737 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2738 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2739 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2740 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2741 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2742 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2743 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2745 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2747 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2749 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2750 bn = B->rmap->n; 2751 if (blda == bn) { 2752 sendbuf = (PetscScalar*)bv; 2753 } else { 2754 sendbuf = abt->buf[0]; 2755 for (k = 0, i = 0; i < cK; i++) { 2756 for (j = 0; j < bn; j++, k++) { 2757 sendbuf[k] = bv[i * blda + j]; 2758 } 2759 } 2760 } 2761 if (size > 1) { 2762 sendto = (rank + size - 1) % size; 2763 recvfrom = (rank + size + 1) % size; 2764 } else { 2765 sendto = recvfrom = 0; 2766 } 2767 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2768 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2769 recvisfrom = rank; 2770 for (i = 0; i < size; i++) { 2771 /* we have finished receiving in sending, bufs can be read/modified */ 2772 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2773 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2774 2775 if (nextrecvisfrom != rank) { 2776 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2777 sendsiz = cK * bn; 2778 recvsiz = cK * nextbn; 2779 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2780 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2781 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2782 } 2783 2784 /* local aseq * sendbuf^T */ 2785 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2786 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2787 2788 if (nextrecvisfrom != rank) { 2789 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2790 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2791 } 2792 bn = nextbn; 2793 recvisfrom = nextrecvisfrom; 2794 sendbuf = recvbuf; 2795 } 2796 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2797 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2798 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2799 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2800 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2805 { 2806 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2807 Mat_MatTransMultDense *abt; 2808 PetscErrorCode ierr; 2809 MPI_Comm comm; 2810 PetscMPIInt size; 2811 PetscScalar *cv, *sendbuf, *recvbuf; 2812 const PetscScalar *av,*bv; 2813 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2814 PetscScalar _DOne=1.0,_DZero=0.0; 2815 PetscBLASInt cm, cn, ck, alda, clda; 2816 2817 PetscFunctionBegin; 2818 MatCheckProduct(C,3); 2819 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2820 abt = (Mat_MatTransMultDense*)C->product->data; 2821 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2823 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2824 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2825 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2826 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2827 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2828 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2829 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2830 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2831 /* copy transpose of B into buf[0] */ 2832 bn = B->rmap->n; 2833 sendbuf = abt->buf[0]; 2834 recvbuf = abt->buf[1]; 2835 for (k = 0, j = 0; j < bn; j++) { 2836 for (i = 0; i < cK; i++, k++) { 2837 sendbuf[k] = bv[i * blda + j]; 2838 } 2839 } 2840 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2841 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2842 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2843 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2844 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2845 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2846 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2847 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2848 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2849 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2850 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2855 { 2856 Mat_MatTransMultDense *abt; 2857 PetscErrorCode ierr; 2858 2859 PetscFunctionBegin; 2860 MatCheckProduct(C,3); 2861 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2862 abt = (Mat_MatTransMultDense*)C->product->data; 2863 switch (abt->alg) { 2864 case 1: 2865 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2866 break; 2867 default: 2868 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2869 break; 2870 } 2871 PetscFunctionReturn(0); 2872 } 2873 2874 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2875 { 2876 PetscErrorCode ierr; 2877 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2878 2879 PetscFunctionBegin; 2880 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2881 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2882 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2883 ierr = PetscFree(ab);CHKERRQ(ierr); 2884 PetscFunctionReturn(0); 2885 } 2886 2887 #if defined(PETSC_HAVE_ELEMENTAL) 2888 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2889 { 2890 PetscErrorCode ierr; 2891 Mat_MatMultDense *ab; 2892 2893 PetscFunctionBegin; 2894 MatCheckProduct(C,3); 2895 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2896 ab = (Mat_MatMultDense*)C->product->data; 2897 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2898 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2899 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2900 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2901 PetscFunctionReturn(0); 2902 } 2903 2904 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2905 { 2906 PetscErrorCode ierr; 2907 Mat Ae,Be,Ce; 2908 Mat_MatMultDense *ab; 2909 2910 PetscFunctionBegin; 2911 MatCheckProduct(C,4); 2912 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2913 /* check local size of A and B */ 2914 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2915 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2916 } 2917 2918 /* create elemental matrices Ae and Be */ 2919 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2920 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2921 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2922 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2923 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2924 2925 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2926 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2927 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2928 ierr = MatSetUp(Be);CHKERRQ(ierr); 2929 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2930 2931 /* compute symbolic Ce = Ae*Be */ 2932 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2933 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2934 2935 /* setup C */ 2936 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2937 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2938 ierr = MatSetUp(C);CHKERRQ(ierr); 2939 2940 /* create data structure for reuse Cdense */ 2941 ierr = PetscNew(&ab);CHKERRQ(ierr); 2942 ab->Ae = Ae; 2943 ab->Be = Be; 2944 ab->Ce = Ce; 2945 2946 C->product->data = ab; 2947 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2948 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2949 PetscFunctionReturn(0); 2950 } 2951 #endif 2952 /* ----------------------------------------------- */ 2953 #if defined(PETSC_HAVE_ELEMENTAL) 2954 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2955 { 2956 PetscFunctionBegin; 2957 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2958 C->ops->productsymbolic = MatProductSymbolic_AB; 2959 PetscFunctionReturn(0); 2960 } 2961 #endif 2962 2963 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2964 { 2965 Mat_Product *product = C->product; 2966 Mat A = product->A,B=product->B; 2967 2968 PetscFunctionBegin; 2969 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2970 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2971 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2972 C->ops->productsymbolic = MatProductSymbolic_AtB; 2973 PetscFunctionReturn(0); 2974 } 2975 2976 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2977 { 2978 PetscErrorCode ierr; 2979 Mat_Product *product = C->product; 2980 const char *algTypes[2] = {"allgatherv","cyclic"}; 2981 PetscInt alg,nalg = 2; 2982 PetscBool flg = PETSC_FALSE; 2983 2984 PetscFunctionBegin; 2985 /* Set default algorithm */ 2986 alg = 0; /* default is allgatherv */ 2987 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2988 if (flg) { 2989 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2990 } 2991 2992 /* Get runtime option */ 2993 if (product->api_user) { 2994 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2995 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2996 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2997 } else { 2998 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2999 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3000 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3001 } 3002 if (flg) { 3003 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3004 } 3005 3006 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3007 C->ops->productsymbolic = MatProductSymbolic_ABt; 3008 PetscFunctionReturn(0); 3009 } 3010 3011 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3012 { 3013 PetscErrorCode ierr; 3014 Mat_Product *product = C->product; 3015 3016 PetscFunctionBegin; 3017 switch (product->type) { 3018 #if defined(PETSC_HAVE_ELEMENTAL) 3019 case MATPRODUCT_AB: 3020 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3021 break; 3022 #endif 3023 case MATPRODUCT_AtB: 3024 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3025 break; 3026 case MATPRODUCT_ABt: 3027 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3028 break; 3029 default: 3030 break; 3031 } 3032 PetscFunctionReturn(0); 3033 } 3034