1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 47 PetscErrorCode ierr; 48 49 PetscFunctionBegin; 50 ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 57 PetscErrorCode ierr; 58 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 59 60 PetscFunctionBegin; 61 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 62 lrow = row - rstart; 63 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 68 { 69 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 70 PetscErrorCode ierr; 71 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 72 73 PetscFunctionBegin; 74 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 75 lrow = row - rstart; 76 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 81 { 82 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 83 PetscErrorCode ierr; 84 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 85 PetscScalar *array; 86 MPI_Comm comm; 87 PetscBool flg; 88 Mat B; 89 90 PetscFunctionBegin; 91 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 92 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 93 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 94 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 95 96 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 97 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 98 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 99 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 100 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 101 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 102 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 103 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 104 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 105 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 107 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 108 *a = B; 109 ierr = MatDestroy(&B);CHKERRQ(ierr); 110 } else *a = B; 111 PetscFunctionReturn(0); 112 } 113 114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 117 PetscErrorCode ierr; 118 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 119 PetscBool roworiented = A->roworiented; 120 121 PetscFunctionBegin; 122 for (i=0; i<m; i++) { 123 if (idxm[i] < 0) continue; 124 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 125 if (idxm[i] >= rstart && idxm[i] < rend) { 126 row = idxm[i] - rstart; 127 if (roworiented) { 128 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 129 } else { 130 for (j=0; j<n; j++) { 131 if (idxn[j] < 0) continue; 132 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 133 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 134 } 135 } 136 } else if (!A->donotstash) { 137 mat->assembled = PETSC_FALSE; 138 if (roworiented) { 139 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 140 } else { 141 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 142 } 143 } 144 } 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 149 { 150 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 153 154 PetscFunctionBegin; 155 for (i=0; i<m; i++) { 156 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 157 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 158 if (idxm[i] >= rstart && idxm[i] < rend) { 159 row = idxm[i] - rstart; 160 for (j=0; j<n; j++) { 161 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 162 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 163 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 164 } 165 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscBool iscuda; 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 if (!a->A) { 188 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 189 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 190 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 191 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 192 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 193 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 194 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 195 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 196 } 197 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 230 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 235 { 236 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 241 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 242 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 247 { 248 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 249 PetscErrorCode ierr; 250 251 PetscFunctionBegin; 252 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 253 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 254 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 259 { 260 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 265 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 266 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 271 { 272 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 273 PetscErrorCode ierr; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 284 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 285 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 288 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 289 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 290 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 291 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 292 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 309 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 310 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 311 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 317 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 318 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 326 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 /* Free work space */ 333 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 334 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 335 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 371 { 372 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 373 PetscErrorCode ierr; 374 PetscInt nstash,reallocs; 375 376 PetscFunctionBegin; 377 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 378 379 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 380 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 381 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 386 { 387 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 388 PetscErrorCode ierr; 389 PetscInt i,*row,*col,flg,j,rstart,ncols; 390 PetscMPIInt n; 391 PetscScalar *val; 392 393 PetscFunctionBegin; 394 if (!mdn->donotstash && !mat->nooffprocentries) { 395 /* wait on receives */ 396 while (1) { 397 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 398 if (!flg) break; 399 400 for (i=0; i<n;) { 401 /* Now identify the consecutive vals belonging to the same row */ 402 for (j=i,rstart=row[j]; j<n; j++) { 403 if (row[j] != rstart) break; 404 } 405 if (j < n) ncols = j-i; 406 else ncols = n-i; 407 /* Now assemble all these values with a single function call */ 408 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 409 i = j; 410 } 411 } 412 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 413 } 414 415 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 417 418 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 419 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 425 { 426 PetscErrorCode ierr; 427 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 428 429 PetscFunctionBegin; 430 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 437 PetscErrorCode ierr; 438 PetscInt i,len,*lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 449 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 450 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 451 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 452 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 453 } 454 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 455 if (diag != 0.0) { 456 Vec d; 457 458 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 459 ierr = VecSet(d,diag);CHKERRQ(ierr); 460 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecDestroy(&d);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(lrows);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype,aymtype; 479 480 PetscFunctionBegin; 481 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 482 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 483 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 484 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 485 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 486 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 487 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype,aymtype; 498 499 PetscFunctionBegin; 500 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 501 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 502 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 503 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 504 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 505 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 506 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype,aymtype; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 520 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 522 ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr); 523 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 524 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 525 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 526 ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 PetscErrorCode ierr; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype,aymtype; 537 538 PetscFunctionBegin; 539 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 540 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 541 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 542 ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr); 543 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 544 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 545 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 546 ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 553 PetscErrorCode ierr; 554 PetscInt lda,len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 const PetscScalar *av; 557 558 PetscFunctionBegin; 559 ierr = VecSet(v,zero);CHKERRQ(ierr); 560 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 561 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 562 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 563 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 564 radd = A->rmap->rstart*m; 565 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 566 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 567 for (i=0; i<len; i++) { 568 x[i] = av[radd + i*lda + i]; 569 } 570 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 571 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatDestroy_MPIDense(Mat mat) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 #if defined(PETSC_USE_LOG) 582 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 583 #endif 584 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 585 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 586 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 587 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 588 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 589 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 590 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 591 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 592 593 ierr = PetscFree(mat->data);CHKERRQ(ierr); 594 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 595 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 607 #if defined(PETSC_HAVE_ELEMENTAL) 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 609 #endif 610 #if defined(PETSC_HAVE_SCALAPACK) 611 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 612 #endif 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 614 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 616 #if defined (PETSC_HAVE_CUDA) 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 618 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 619 #endif 620 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 621 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 622 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 623 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 624 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 625 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 628 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 634 635 #include <petscdraw.h> 636 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 637 { 638 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 639 PetscErrorCode ierr; 640 PetscMPIInt rank; 641 PetscViewerType vtype; 642 PetscBool iascii,isdraw; 643 PetscViewer sviewer; 644 PetscViewerFormat format; 645 646 PetscFunctionBegin; 647 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 648 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 649 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 650 if (iascii) { 651 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 652 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 653 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 654 MatInfo info; 655 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 656 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 657 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 658 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 660 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } else if (format == PETSC_VIEWER_ASCII_INFO) { 663 PetscFunctionReturn(0); 664 } 665 } else if (isdraw) { 666 PetscDraw draw; 667 PetscBool isnull; 668 669 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 670 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 671 if (isnull) PetscFunctionReturn(0); 672 } 673 674 { 675 /* assemble the entire matrix onto first processor. */ 676 Mat A; 677 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 678 PetscInt *cols; 679 PetscScalar *vals; 680 681 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 682 if (!rank) { 683 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 684 } else { 685 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 686 } 687 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 688 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 689 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 690 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 691 692 /* Copy the matrix ... This isn't the most efficient means, 693 but it's quick for now */ 694 A->insertmode = INSERT_VALUES; 695 696 row = mat->rmap->rstart; 697 m = mdn->A->rmap->n; 698 for (i=0; i<m; i++) { 699 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 700 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 702 row++; 703 } 704 705 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 707 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 708 if (!rank) { 709 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 710 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 711 } 712 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 713 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 714 ierr = MatDestroy(&A);CHKERRQ(ierr); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 720 { 721 PetscErrorCode ierr; 722 PetscBool iascii,isbinary,isdraw,issocket; 723 724 PetscFunctionBegin; 725 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 726 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 727 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 729 730 if (iascii || issocket || isdraw) { 731 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 732 } else if (isbinary) { 733 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 739 { 740 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 741 Mat mdn = mat->A; 742 PetscErrorCode ierr; 743 PetscLogDouble isend[5],irecv[5]; 744 745 PetscFunctionBegin; 746 info->block_size = 1.0; 747 748 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 749 750 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 751 isend[3] = info->memory; isend[4] = info->mallocs; 752 if (flag == MAT_LOCAL) { 753 info->nz_used = isend[0]; 754 info->nz_allocated = isend[1]; 755 info->nz_unneeded = isend[2]; 756 info->memory = isend[3]; 757 info->mallocs = isend[4]; 758 } else if (flag == MAT_GLOBAL_MAX) { 759 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 760 761 info->nz_used = irecv[0]; 762 info->nz_allocated = irecv[1]; 763 info->nz_unneeded = irecv[2]; 764 info->memory = irecv[3]; 765 info->mallocs = irecv[4]; 766 } else if (flag == MAT_GLOBAL_SUM) { 767 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 768 769 info->nz_used = irecv[0]; 770 info->nz_allocated = irecv[1]; 771 info->nz_unneeded = irecv[2]; 772 info->memory = irecv[3]; 773 info->mallocs = irecv[4]; 774 } 775 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 776 info->fill_ratio_needed = 0; 777 info->factor_mallocs = 0; 778 PetscFunctionReturn(0); 779 } 780 781 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 782 { 783 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 switch (op) { 788 case MAT_NEW_NONZERO_LOCATIONS: 789 case MAT_NEW_NONZERO_LOCATION_ERR: 790 case MAT_NEW_NONZERO_ALLOCATION_ERR: 791 MatCheckPreallocated(A,1); 792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 793 break; 794 case MAT_ROW_ORIENTED: 795 MatCheckPreallocated(A,1); 796 a->roworiented = flg; 797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 798 break; 799 case MAT_FORCE_DIAGONAL_ENTRIES: 800 case MAT_KEEP_NONZERO_PATTERN: 801 case MAT_USE_HASH_TABLE: 802 case MAT_SORTED_FULL: 803 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 804 break; 805 case MAT_IGNORE_OFF_PROC_ENTRIES: 806 a->donotstash = flg; 807 break; 808 case MAT_SYMMETRIC: 809 case MAT_STRUCTURALLY_SYMMETRIC: 810 case MAT_HERMITIAN: 811 case MAT_SYMMETRY_ETERNAL: 812 case MAT_IGNORE_LOWER_TRIANGULAR: 813 case MAT_IGNORE_ZERO_ENTRIES: 814 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 815 break; 816 default: 817 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 818 } 819 PetscFunctionReturn(0); 820 } 821 822 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 823 { 824 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 825 const PetscScalar *l; 826 PetscScalar x,*v,*vv,*r; 827 PetscErrorCode ierr; 828 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 829 830 PetscFunctionBegin; 831 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 832 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 833 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 834 if (ll) { 835 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 836 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 837 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 838 for (i=0; i<m; i++) { 839 x = l[i]; 840 v = vv + i; 841 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 842 } 843 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 844 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 845 } 846 if (rr) { 847 const PetscScalar *ar; 848 849 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 850 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 851 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 852 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 853 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 854 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 855 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 856 for (i=0; i<n; i++) { 857 x = r[i]; 858 v = vv + i*lda; 859 for (j=0; j<m; j++) (*v++) *= x; 860 } 861 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 862 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 863 } 864 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 869 { 870 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 871 PetscErrorCode ierr; 872 PetscInt i,j; 873 PetscMPIInt size; 874 PetscReal sum = 0.0; 875 const PetscScalar *av,*v; 876 877 PetscFunctionBegin; 878 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 879 v = av; 880 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 881 if (size == 1) { 882 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 883 } else { 884 if (type == NORM_FROBENIUS) { 885 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 886 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 887 } 888 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 889 *nrm = PetscSqrtReal(*nrm); 890 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 891 } else if (type == NORM_1) { 892 PetscReal *tmp,*tmp2; 893 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 894 *nrm = 0.0; 895 v = av; 896 for (j=0; j<mdn->A->cmap->n; j++) { 897 for (i=0; i<mdn->A->rmap->n; i++) { 898 tmp[j] += PetscAbsScalar(*v); v++; 899 } 900 } 901 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 902 for (j=0; j<A->cmap->N; j++) { 903 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 904 } 905 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 906 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 907 } else if (type == NORM_INFINITY) { /* max row norm */ 908 PetscReal ntemp; 909 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 910 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 911 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 912 } 913 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916 917 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 918 { 919 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 920 Mat B; 921 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 922 PetscErrorCode ierr; 923 PetscInt j,i,lda; 924 PetscScalar *v; 925 926 PetscFunctionBegin; 927 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 928 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 929 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 930 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 931 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 932 } else B = *matout; 933 934 m = a->A->rmap->n; n = a->A->cmap->n; 935 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 936 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 937 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 938 for (i=0; i<m; i++) rwork[i] = rstart + i; 939 for (j=0; j<n; j++) { 940 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 941 v += lda; 942 } 943 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 944 ierr = PetscFree(rwork);CHKERRQ(ierr); 945 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 946 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 947 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 948 *matout = B; 949 } else { 950 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 956 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 957 958 PetscErrorCode MatSetUp_MPIDense(Mat A) 959 { 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 964 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 965 if (!A->preallocated) { 966 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 972 { 973 PetscErrorCode ierr; 974 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 975 976 PetscFunctionBegin; 977 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatConjugate_MPIDense(Mat mat) 982 { 983 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 ierr = MatConjugate(a->A);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatRealPart_MPIDense(Mat A) 992 { 993 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 ierr = MatRealPart(a->A);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1002 { 1003 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1012 { 1013 PetscErrorCode ierr; 1014 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1015 1016 PetscFunctionBegin; 1017 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1018 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1019 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1024 1025 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1026 { 1027 PetscErrorCode ierr; 1028 PetscInt i,n; 1029 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1030 PetscReal *work; 1031 1032 PetscFunctionBegin; 1033 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1034 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1035 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1036 if (type == NORM_2) { 1037 for (i=0; i<n; i++) work[i] *= work[i]; 1038 } 1039 if (type == NORM_INFINITY) { 1040 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1041 } else { 1042 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1043 } 1044 ierr = PetscFree(work);CHKERRQ(ierr); 1045 if (type == NORM_2) { 1046 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #if defined(PETSC_HAVE_CUDA) 1052 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1053 { 1054 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt lda; 1057 1058 PetscFunctionBegin; 1059 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1060 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1061 if (!a->cvec) { 1062 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1063 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1064 } 1065 a->vecinuse = col + 1; 1066 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1067 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1068 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1069 *v = a->cvec; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1074 { 1075 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1076 PetscErrorCode ierr; 1077 1078 PetscFunctionBegin; 1079 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1080 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1081 a->vecinuse = 0; 1082 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1083 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1084 *v = NULL; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1089 { 1090 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1091 PetscInt lda; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1096 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1097 if (!a->cvec) { 1098 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1099 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1100 } 1101 a->vecinuse = col + 1; 1102 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1103 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1104 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1105 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1106 *v = a->cvec; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1111 { 1112 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1117 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1118 a->vecinuse = 0; 1119 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1120 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1121 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1122 *v = NULL; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1127 { 1128 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1129 PetscErrorCode ierr; 1130 PetscInt lda; 1131 1132 PetscFunctionBegin; 1133 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1134 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1135 if (!a->cvec) { 1136 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1137 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1138 } 1139 a->vecinuse = col + 1; 1140 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1141 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1142 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1143 *v = a->cvec; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1148 { 1149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1154 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1155 a->vecinuse = 0; 1156 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1157 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1158 *v = NULL; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1163 { 1164 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1169 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1170 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1175 { 1176 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1181 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1182 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1187 { 1188 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1193 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1194 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1199 { 1200 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1209 { 1210 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1219 { 1220 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1229 { 1230 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1239 { 1240 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1249 { 1250 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1259 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1260 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1261 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1262 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1263 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1264 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1265 1266 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1267 { 1268 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1273 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1274 if (d->A) { 1275 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1276 } 1277 mat->boundtocpu = bind; 1278 if (!bind) { 1279 PetscBool iscuda; 1280 1281 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1282 if (!iscuda) { 1283 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1284 } 1285 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1286 if (!iscuda) { 1287 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1288 } 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1295 } else { 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1302 } 1303 if (d->cmat) { 1304 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1310 { 1311 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1312 PetscErrorCode ierr; 1313 PetscBool iscuda; 1314 1315 PetscFunctionBegin; 1316 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1317 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1318 if (!iscuda) PetscFunctionReturn(0); 1319 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1320 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1321 if (!d->A) { 1322 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1323 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1324 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1325 } 1326 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1327 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1328 A->preallocated = PETSC_TRUE; 1329 PetscFunctionReturn(0); 1330 } 1331 #endif 1332 1333 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1334 { 1335 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1344 { 1345 PetscFunctionBegin; 1346 *missing = PETSC_FALSE; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1351 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1352 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1353 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1354 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1355 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1356 1357 /* -------------------------------------------------------------------*/ 1358 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1359 MatGetRow_MPIDense, 1360 MatRestoreRow_MPIDense, 1361 MatMult_MPIDense, 1362 /* 4*/ MatMultAdd_MPIDense, 1363 MatMultTranspose_MPIDense, 1364 MatMultTransposeAdd_MPIDense, 1365 NULL, 1366 NULL, 1367 NULL, 1368 /* 10*/ NULL, 1369 NULL, 1370 NULL, 1371 NULL, 1372 MatTranspose_MPIDense, 1373 /* 15*/ MatGetInfo_MPIDense, 1374 MatEqual_MPIDense, 1375 MatGetDiagonal_MPIDense, 1376 MatDiagonalScale_MPIDense, 1377 MatNorm_MPIDense, 1378 /* 20*/ MatAssemblyBegin_MPIDense, 1379 MatAssemblyEnd_MPIDense, 1380 MatSetOption_MPIDense, 1381 MatZeroEntries_MPIDense, 1382 /* 24*/ MatZeroRows_MPIDense, 1383 NULL, 1384 NULL, 1385 NULL, 1386 NULL, 1387 /* 29*/ MatSetUp_MPIDense, 1388 NULL, 1389 NULL, 1390 MatGetDiagonalBlock_MPIDense, 1391 NULL, 1392 /* 34*/ MatDuplicate_MPIDense, 1393 NULL, 1394 NULL, 1395 NULL, 1396 NULL, 1397 /* 39*/ MatAXPY_MPIDense, 1398 MatCreateSubMatrices_MPIDense, 1399 NULL, 1400 MatGetValues_MPIDense, 1401 MatCopy_MPIDense, 1402 /* 44*/ NULL, 1403 MatScale_MPIDense, 1404 MatShift_Basic, 1405 NULL, 1406 NULL, 1407 /* 49*/ MatSetRandom_MPIDense, 1408 NULL, 1409 NULL, 1410 NULL, 1411 NULL, 1412 /* 54*/ NULL, 1413 NULL, 1414 NULL, 1415 NULL, 1416 NULL, 1417 /* 59*/ MatCreateSubMatrix_MPIDense, 1418 MatDestroy_MPIDense, 1419 MatView_MPIDense, 1420 NULL, 1421 NULL, 1422 /* 64*/ NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 NULL, 1427 /* 69*/ NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 NULL, 1432 /* 74*/ NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /* 79*/ NULL, 1438 NULL, 1439 NULL, 1440 NULL, 1441 /* 83*/ MatLoad_MPIDense, 1442 NULL, 1443 NULL, 1444 NULL, 1445 NULL, 1446 NULL, 1447 /* 89*/ NULL, 1448 NULL, 1449 NULL, 1450 NULL, 1451 NULL, 1452 /* 94*/ NULL, 1453 NULL, 1454 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1455 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1456 NULL, 1457 /* 99*/ MatProductSetFromOptions_MPIDense, 1458 NULL, 1459 NULL, 1460 MatConjugate_MPIDense, 1461 NULL, 1462 /*104*/ NULL, 1463 MatRealPart_MPIDense, 1464 MatImaginaryPart_MPIDense, 1465 NULL, 1466 NULL, 1467 /*109*/ NULL, 1468 NULL, 1469 NULL, 1470 MatGetColumnVector_MPIDense, 1471 MatMissingDiagonal_MPIDense, 1472 /*114*/ NULL, 1473 NULL, 1474 NULL, 1475 NULL, 1476 NULL, 1477 /*119*/ NULL, 1478 NULL, 1479 NULL, 1480 NULL, 1481 NULL, 1482 /*124*/ NULL, 1483 MatGetColumnNorms_MPIDense, 1484 NULL, 1485 NULL, 1486 NULL, 1487 /*129*/ NULL, 1488 NULL, 1489 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1490 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1491 NULL, 1492 /*134*/ NULL, 1493 NULL, 1494 NULL, 1495 NULL, 1496 NULL, 1497 /*139*/ NULL, 1498 NULL, 1499 NULL, 1500 NULL, 1501 NULL, 1502 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1503 /*145*/ NULL, 1504 NULL, 1505 NULL 1506 }; 1507 1508 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1509 { 1510 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1511 PetscBool iscuda; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1516 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1517 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1518 if (!a->A) { 1519 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1520 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1521 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1522 } 1523 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1524 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1525 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1526 mat->preallocated = PETSC_TRUE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #if defined(PETSC_HAVE_ELEMENTAL) 1531 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1532 { 1533 Mat mat_elemental; 1534 PetscErrorCode ierr; 1535 PetscScalar *v; 1536 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1537 1538 PetscFunctionBegin; 1539 if (reuse == MAT_REUSE_MATRIX) { 1540 mat_elemental = *newmat; 1541 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1542 } else { 1543 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1544 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1545 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1546 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1547 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1548 } 1549 1550 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1551 for (i=0; i<N; i++) cols[i] = i; 1552 for (i=0; i<m; i++) rows[i] = rstart + i; 1553 1554 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1555 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1556 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1557 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1558 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1559 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1560 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1561 1562 if (reuse == MAT_INPLACE_MATRIX) { 1563 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1564 } else { 1565 *newmat = mat_elemental; 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 #endif 1570 1571 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1572 { 1573 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1574 PetscErrorCode ierr; 1575 1576 PetscFunctionBegin; 1577 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1582 { 1583 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1592 { 1593 PetscErrorCode ierr; 1594 Mat_MPIDense *mat; 1595 PetscInt m,nloc,N; 1596 1597 PetscFunctionBegin; 1598 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1599 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1600 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1601 PetscInt sum; 1602 1603 if (n == PETSC_DECIDE) { 1604 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1605 } 1606 /* Check sum(n) = N */ 1607 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1608 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1609 1610 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1611 } 1612 1613 /* numeric phase */ 1614 mat = (Mat_MPIDense*)(*outmat)->data; 1615 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1616 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1617 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #if defined(PETSC_HAVE_CUDA) 1622 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1623 { 1624 Mat B; 1625 Mat_MPIDense *m; 1626 PetscErrorCode ierr; 1627 1628 PetscFunctionBegin; 1629 if (reuse == MAT_INITIAL_MATRIX) { 1630 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1631 } else if (reuse == MAT_REUSE_MATRIX) { 1632 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1633 } 1634 1635 B = *newmat; 1636 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1637 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1638 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1639 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1654 m = (Mat_MPIDense*)(B)->data; 1655 if (m->A) { 1656 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1657 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1658 } 1659 B->ops->bindtocpu = NULL; 1660 B->offloadmask = PETSC_OFFLOAD_CPU; 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1665 { 1666 Mat B; 1667 Mat_MPIDense *m; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 if (reuse == MAT_INITIAL_MATRIX) { 1672 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1673 } else if (reuse == MAT_REUSE_MATRIX) { 1674 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1675 } 1676 1677 B = *newmat; 1678 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1679 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1680 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1695 m = (Mat_MPIDense*)(B)->data; 1696 if (m->A) { 1697 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1698 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1699 B->offloadmask = PETSC_OFFLOAD_BOTH; 1700 } else { 1701 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1702 } 1703 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1704 1705 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1706 PetscFunctionReturn(0); 1707 } 1708 #endif 1709 1710 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1711 { 1712 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1713 PetscErrorCode ierr; 1714 PetscInt lda; 1715 1716 PetscFunctionBegin; 1717 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1718 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1719 if (!a->cvec) { 1720 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1721 } 1722 a->vecinuse = col + 1; 1723 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1724 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1725 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1726 *v = a->cvec; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1731 { 1732 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1737 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1738 a->vecinuse = 0; 1739 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1740 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1741 *v = NULL; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1746 { 1747 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1748 PetscErrorCode ierr; 1749 PetscInt lda; 1750 1751 PetscFunctionBegin; 1752 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1753 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1754 if (!a->cvec) { 1755 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1756 } 1757 a->vecinuse = col + 1; 1758 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1759 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1760 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1761 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1762 *v = a->cvec; 1763 PetscFunctionReturn(0); 1764 } 1765 1766 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1767 { 1768 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1773 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1774 a->vecinuse = 0; 1775 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1776 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1777 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1778 *v = NULL; 1779 PetscFunctionReturn(0); 1780 } 1781 1782 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1783 { 1784 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1785 PetscErrorCode ierr; 1786 PetscInt lda; 1787 1788 PetscFunctionBegin; 1789 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1790 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1791 if (!a->cvec) { 1792 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1793 } 1794 a->vecinuse = col + 1; 1795 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1796 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1797 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1798 *v = a->cvec; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1803 { 1804 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1805 PetscErrorCode ierr; 1806 1807 PetscFunctionBegin; 1808 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1809 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1810 a->vecinuse = 0; 1811 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1812 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1813 *v = NULL; 1814 PetscFunctionReturn(0); 1815 } 1816 1817 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1818 { 1819 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1820 Mat_MPIDense *c; 1821 PetscErrorCode ierr; 1822 MPI_Comm comm; 1823 PetscBool setup = PETSC_FALSE; 1824 1825 PetscFunctionBegin; 1826 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1827 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1828 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1829 if (!a->cmat) { 1830 setup = PETSC_TRUE; 1831 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1832 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1833 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1834 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1835 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1836 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1837 } else if (cend-cbegin != a->cmat->cmap->N) { 1838 setup = PETSC_TRUE; 1839 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1840 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1841 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1842 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1843 } 1844 c = (Mat_MPIDense*)a->cmat->data; 1845 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1846 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1847 if (setup) { /* do we really need this? */ 1848 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1849 } 1850 a->cmat->preallocated = PETSC_TRUE; 1851 a->cmat->assembled = PETSC_TRUE; 1852 a->matinuse = cbegin + 1; 1853 *v = a->cmat; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1858 { 1859 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1860 Mat_MPIDense *c; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1865 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1866 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1867 a->matinuse = 0; 1868 c = (Mat_MPIDense*)a->cmat->data; 1869 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1870 *v = NULL; 1871 PetscFunctionReturn(0); 1872 } 1873 1874 /*MC 1875 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1876 1877 Options Database Keys: 1878 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1879 1880 Level: beginner 1881 1882 .seealso: MatCreateDense() 1883 1884 M*/ 1885 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1886 { 1887 Mat_MPIDense *a; 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1892 mat->data = (void*)a; 1893 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1894 1895 mat->insertmode = NOT_SET_VALUES; 1896 1897 /* build cache for off array entries formed */ 1898 a->donotstash = PETSC_FALSE; 1899 1900 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1901 1902 /* stuff used for matrix vector multiply */ 1903 a->lvec = NULL; 1904 a->Mvctx = NULL; 1905 a->roworiented = PETSC_TRUE; 1906 1907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1923 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1924 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1925 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1926 #if defined(PETSC_HAVE_ELEMENTAL) 1927 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1928 #endif 1929 #if defined(PETSC_HAVE_SCALAPACK) 1930 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1931 #endif 1932 #if defined(PETSC_HAVE_CUDA) 1933 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1934 #endif 1935 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1936 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1937 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1938 #if defined(PETSC_HAVE_CUDA) 1939 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1940 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1941 #endif 1942 1943 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1944 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1945 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 1949 /*MC 1950 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1951 1952 Options Database Keys: 1953 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1954 1955 Level: beginner 1956 1957 .seealso: 1958 1959 M*/ 1960 #if defined(PETSC_HAVE_CUDA) 1961 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1962 { 1963 PetscErrorCode ierr; 1964 1965 PetscFunctionBegin; 1966 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 1967 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1968 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 #endif 1972 1973 /*MC 1974 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1975 1976 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1977 and MATMPIDENSE otherwise. 1978 1979 Options Database Keys: 1980 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1981 1982 Level: beginner 1983 1984 1985 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1986 M*/ 1987 1988 /*MC 1989 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1990 1991 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1992 and MATMPIDENSECUDA otherwise. 1993 1994 Options Database Keys: 1995 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1996 1997 Level: beginner 1998 1999 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 2000 M*/ 2001 2002 /*@C 2003 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2004 2005 Collective 2006 2007 Input Parameters: 2008 . B - the matrix 2009 - data - optional location of matrix data. Set data=NULL for PETSc 2010 to control all matrix memory allocation. 2011 2012 Notes: 2013 The dense format is fully compatible with standard Fortran 77 2014 storage by columns. 2015 2016 The data input variable is intended primarily for Fortran programmers 2017 who wish to allocate their own matrix memory space. Most users should 2018 set data=NULL. 2019 2020 Level: intermediate 2021 2022 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2023 @*/ 2024 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2025 { 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2030 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 /*@ 2035 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2036 array provided by the user. This is useful to avoid copying an array 2037 into a matrix 2038 2039 Not Collective 2040 2041 Input Parameters: 2042 + mat - the matrix 2043 - array - the array in column major order 2044 2045 Notes: 2046 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2047 freed when the matrix is destroyed. 2048 2049 Level: developer 2050 2051 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2052 2053 @*/ 2054 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2055 { 2056 PetscErrorCode ierr; 2057 2058 PetscFunctionBegin; 2059 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2060 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2061 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2062 #if defined(PETSC_HAVE_CUDA) 2063 mat->offloadmask = PETSC_OFFLOAD_CPU; 2064 #endif 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /*@ 2069 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2070 2071 Not Collective 2072 2073 Input Parameters: 2074 . mat - the matrix 2075 2076 Notes: 2077 You can only call this after a call to MatDensePlaceArray() 2078 2079 Level: developer 2080 2081 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2082 2083 @*/ 2084 PetscErrorCode MatDenseResetArray(Mat mat) 2085 { 2086 PetscErrorCode ierr; 2087 2088 PetscFunctionBegin; 2089 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2090 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2091 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 /*@ 2096 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2097 array provided by the user. This is useful to avoid copying an array 2098 into a matrix 2099 2100 Not Collective 2101 2102 Input Parameters: 2103 + mat - the matrix 2104 - array - the array in column major order 2105 2106 Notes: 2107 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2108 freed by the user. It will be freed when the matrix is destroyed. 2109 2110 Level: developer 2111 2112 .seealso: MatDenseGetArray(), VecReplaceArray() 2113 @*/ 2114 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2115 { 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2120 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2121 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2122 #if defined(PETSC_HAVE_CUDA) 2123 mat->offloadmask = PETSC_OFFLOAD_CPU; 2124 #endif 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #if defined(PETSC_HAVE_CUDA) 2129 /*@C 2130 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2131 array provided by the user. This is useful to avoid copying an array 2132 into a matrix 2133 2134 Not Collective 2135 2136 Input Parameters: 2137 + mat - the matrix 2138 - array - the array in column major order 2139 2140 Notes: 2141 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2142 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2143 2144 Level: developer 2145 2146 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2147 @*/ 2148 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2149 { 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2154 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2155 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2156 mat->offloadmask = PETSC_OFFLOAD_GPU; 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /*@C 2161 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2162 2163 Not Collective 2164 2165 Input Parameters: 2166 . mat - the matrix 2167 2168 Notes: 2169 You can only call this after a call to MatDenseCUDAPlaceArray() 2170 2171 Level: developer 2172 2173 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2174 2175 @*/ 2176 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2177 { 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2182 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2183 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 /*@C 2188 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2189 array provided by the user. This is useful to avoid copying an array 2190 into a matrix 2191 2192 Not Collective 2193 2194 Input Parameters: 2195 + mat - the matrix 2196 - array - the array in column major order 2197 2198 Notes: 2199 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2200 The memory passed in CANNOT be freed by the user. It will be freed 2201 when the matrix is destroyed. The array should respect the matrix leading dimension. 2202 2203 Level: developer 2204 2205 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2206 @*/ 2207 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2208 { 2209 PetscErrorCode ierr; 2210 2211 PetscFunctionBegin; 2212 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2213 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2214 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2215 mat->offloadmask = PETSC_OFFLOAD_GPU; 2216 PetscFunctionReturn(0); 2217 } 2218 2219 /*@C 2220 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2221 2222 Not Collective 2223 2224 Input Parameters: 2225 . A - the matrix 2226 2227 Output Parameters 2228 . array - the GPU array in column major order 2229 2230 Notes: 2231 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2232 2233 Level: developer 2234 2235 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2236 @*/ 2237 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2238 { 2239 PetscErrorCode ierr; 2240 2241 PetscFunctionBegin; 2242 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2243 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2244 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 /*@C 2249 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2250 2251 Not Collective 2252 2253 Input Parameters: 2254 + A - the matrix 2255 - array - the GPU array in column major order 2256 2257 Notes: 2258 2259 Level: developer 2260 2261 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2262 @*/ 2263 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2264 { 2265 PetscErrorCode ierr; 2266 2267 PetscFunctionBegin; 2268 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2269 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2270 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2271 A->offloadmask = PETSC_OFFLOAD_GPU; 2272 PetscFunctionReturn(0); 2273 } 2274 2275 /*@C 2276 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2277 2278 Not Collective 2279 2280 Input Parameters: 2281 . A - the matrix 2282 2283 Output Parameters 2284 . array - the GPU array in column major order 2285 2286 Notes: 2287 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2288 2289 Level: developer 2290 2291 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2292 @*/ 2293 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2294 { 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2299 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 /*@C 2304 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2305 2306 Not Collective 2307 2308 Input Parameters: 2309 + A - the matrix 2310 - array - the GPU array in column major order 2311 2312 Notes: 2313 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2314 2315 Level: developer 2316 2317 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2318 @*/ 2319 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2320 { 2321 PetscErrorCode ierr; 2322 2323 PetscFunctionBegin; 2324 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 /*@C 2329 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2330 2331 Not Collective 2332 2333 Input Parameters: 2334 . A - the matrix 2335 2336 Output Parameters 2337 . array - the GPU array in column major order 2338 2339 Notes: 2340 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2341 2342 Level: developer 2343 2344 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2345 @*/ 2346 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2347 { 2348 PetscErrorCode ierr; 2349 2350 PetscFunctionBegin; 2351 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2352 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2353 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 /*@C 2358 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2359 2360 Not Collective 2361 2362 Input Parameters: 2363 + A - the matrix 2364 - array - the GPU array in column major order 2365 2366 Notes: 2367 2368 Level: developer 2369 2370 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2371 @*/ 2372 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2373 { 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2378 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2379 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2380 A->offloadmask = PETSC_OFFLOAD_GPU; 2381 PetscFunctionReturn(0); 2382 } 2383 #endif 2384 2385 /*@C 2386 MatCreateDense - Creates a matrix in dense format. 2387 2388 Collective 2389 2390 Input Parameters: 2391 + comm - MPI communicator 2392 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2393 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2394 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2395 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2396 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2397 to control all matrix memory allocation. 2398 2399 Output Parameter: 2400 . A - the matrix 2401 2402 Notes: 2403 The dense format is fully compatible with standard Fortran 77 2404 storage by columns. 2405 Note that, although local portions of the matrix are stored in column-major 2406 order, the matrix is partitioned across MPI ranks by row. 2407 2408 The data input variable is intended primarily for Fortran programmers 2409 who wish to allocate their own matrix memory space. Most users should 2410 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2411 2412 The user MUST specify either the local or global matrix dimensions 2413 (possibly both). 2414 2415 Level: intermediate 2416 2417 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2418 @*/ 2419 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2420 { 2421 PetscErrorCode ierr; 2422 PetscMPIInt size; 2423 2424 PetscFunctionBegin; 2425 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2426 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2427 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2428 if (size > 1) { 2429 PetscBool havedata = (PetscBool)!!data; 2430 2431 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2432 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2433 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 2434 if (havedata) { /* user provided data array, so no need to assemble */ 2435 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2436 (*A)->assembled = PETSC_TRUE; 2437 } 2438 } else { 2439 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2440 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2441 } 2442 PetscFunctionReturn(0); 2443 } 2444 2445 #if defined(PETSC_HAVE_CUDA) 2446 /*@C 2447 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2448 2449 Collective 2450 2451 Input Parameters: 2452 + comm - MPI communicator 2453 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2454 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2455 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2456 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2457 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2458 to control matrix memory allocation. 2459 2460 Output Parameter: 2461 . A - the matrix 2462 2463 Notes: 2464 2465 Level: intermediate 2466 2467 .seealso: MatCreate(), MatCreateDense() 2468 @*/ 2469 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2470 { 2471 PetscErrorCode ierr; 2472 PetscMPIInt size; 2473 2474 PetscFunctionBegin; 2475 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2476 PetscValidLogicalCollectiveBool(*A,!!data,6); 2477 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2478 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2479 if (size > 1) { 2480 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2481 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2482 if (data) { /* user provided data array, so no need to assemble */ 2483 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2484 (*A)->assembled = PETSC_TRUE; 2485 } 2486 } else { 2487 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2488 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2489 } 2490 PetscFunctionReturn(0); 2491 } 2492 #endif 2493 2494 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2495 { 2496 Mat mat; 2497 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2498 PetscErrorCode ierr; 2499 2500 PetscFunctionBegin; 2501 *newmat = NULL; 2502 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2503 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2504 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2505 a = (Mat_MPIDense*)mat->data; 2506 2507 mat->factortype = A->factortype; 2508 mat->assembled = PETSC_TRUE; 2509 mat->preallocated = PETSC_TRUE; 2510 2511 mat->insertmode = NOT_SET_VALUES; 2512 a->donotstash = oldmat->donotstash; 2513 2514 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2515 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2516 2517 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2518 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2519 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2520 2521 *newmat = mat; 2522 PetscFunctionReturn(0); 2523 } 2524 2525 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2526 { 2527 PetscErrorCode ierr; 2528 PetscBool isbinary; 2529 #if defined(PETSC_HAVE_HDF5) 2530 PetscBool ishdf5; 2531 #endif 2532 2533 PetscFunctionBegin; 2534 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2535 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2536 /* force binary viewer to load .info file if it has not yet done so */ 2537 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2538 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2539 #if defined(PETSC_HAVE_HDF5) 2540 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2541 #endif 2542 if (isbinary) { 2543 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2544 #if defined(PETSC_HAVE_HDF5) 2545 } else if (ishdf5) { 2546 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2547 #endif 2548 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2553 { 2554 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2555 Mat a,b; 2556 PetscBool flg; 2557 PetscErrorCode ierr; 2558 2559 PetscFunctionBegin; 2560 a = matA->A; 2561 b = matB->A; 2562 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2563 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2568 { 2569 PetscErrorCode ierr; 2570 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2571 2572 PetscFunctionBegin; 2573 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2574 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2575 ierr = PetscFree(atb);CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2580 { 2581 PetscErrorCode ierr; 2582 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2583 2584 PetscFunctionBegin; 2585 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2586 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2587 ierr = PetscFree(abt);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2592 { 2593 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2594 Mat_TransMatMultDense *atb; 2595 PetscErrorCode ierr; 2596 MPI_Comm comm; 2597 PetscMPIInt size,*recvcounts; 2598 PetscScalar *carray,*sendbuf; 2599 const PetscScalar *atbarray; 2600 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2601 const PetscInt *ranges; 2602 2603 PetscFunctionBegin; 2604 MatCheckProduct(C,3); 2605 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2606 atb = (Mat_TransMatMultDense *)C->product->data; 2607 recvcounts = atb->recvcounts; 2608 sendbuf = atb->sendbuf; 2609 2610 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2611 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2612 2613 /* compute atbarray = aseq^T * bseq */ 2614 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2615 2616 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2617 2618 /* arrange atbarray into sendbuf */ 2619 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2620 for (proc=0, k=0; proc<size; proc++) { 2621 for (j=0; j<cN; j++) { 2622 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2623 } 2624 } 2625 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2626 2627 /* sum all atbarray to local values of C */ 2628 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2629 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr); 2630 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2631 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2632 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2637 { 2638 PetscErrorCode ierr; 2639 MPI_Comm comm; 2640 PetscMPIInt size; 2641 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2642 Mat_TransMatMultDense *atb; 2643 PetscBool cisdense; 2644 PetscInt i; 2645 const PetscInt *ranges; 2646 2647 PetscFunctionBegin; 2648 MatCheckProduct(C,3); 2649 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2650 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2651 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2652 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2653 } 2654 2655 /* create matrix product C */ 2656 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2657 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2658 if (!cisdense) { 2659 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2660 } 2661 ierr = MatSetUp(C);CHKERRQ(ierr); 2662 2663 /* create data structure for reuse C */ 2664 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2665 ierr = PetscNew(&atb);CHKERRQ(ierr); 2666 cM = C->rmap->N; 2667 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2668 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2669 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2670 2671 C->product->data = atb; 2672 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2673 PetscFunctionReturn(0); 2674 } 2675 2676 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2677 { 2678 PetscErrorCode ierr; 2679 MPI_Comm comm; 2680 PetscMPIInt i, size; 2681 PetscInt maxRows, bufsiz; 2682 PetscMPIInt tag; 2683 PetscInt alg; 2684 Mat_MatTransMultDense *abt; 2685 Mat_Product *product = C->product; 2686 PetscBool flg; 2687 2688 PetscFunctionBegin; 2689 MatCheckProduct(C,4); 2690 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2691 /* check local size of A and B */ 2692 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2693 2694 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2695 alg = flg ? 0 : 1; 2696 2697 /* setup matrix product C */ 2698 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2699 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2700 ierr = MatSetUp(C);CHKERRQ(ierr); 2701 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2702 2703 /* create data structure for reuse C */ 2704 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2705 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2706 ierr = PetscNew(&abt);CHKERRQ(ierr); 2707 abt->tag = tag; 2708 abt->alg = alg; 2709 switch (alg) { 2710 case 1: /* alg: "cyclic" */ 2711 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2712 bufsiz = A->cmap->N * maxRows; 2713 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2714 break; 2715 default: /* alg: "allgatherv" */ 2716 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2717 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2718 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2719 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2720 break; 2721 } 2722 2723 C->product->data = abt; 2724 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2725 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2726 PetscFunctionReturn(0); 2727 } 2728 2729 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2730 { 2731 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2732 Mat_MatTransMultDense *abt; 2733 PetscErrorCode ierr; 2734 MPI_Comm comm; 2735 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2736 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2737 PetscInt i,cK=A->cmap->N,k,j,bn; 2738 PetscScalar _DOne=1.0,_DZero=0.0; 2739 const PetscScalar *av,*bv; 2740 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2741 MPI_Request reqs[2]; 2742 const PetscInt *ranges; 2743 2744 PetscFunctionBegin; 2745 MatCheckProduct(C,3); 2746 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2747 abt = (Mat_MatTransMultDense*)C->product->data; 2748 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2749 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2750 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2751 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2752 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2753 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2754 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2755 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2756 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2757 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2758 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2759 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2760 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2761 bn = B->rmap->n; 2762 if (blda == bn) { 2763 sendbuf = (PetscScalar*)bv; 2764 } else { 2765 sendbuf = abt->buf[0]; 2766 for (k = 0, i = 0; i < cK; i++) { 2767 for (j = 0; j < bn; j++, k++) { 2768 sendbuf[k] = bv[i * blda + j]; 2769 } 2770 } 2771 } 2772 if (size > 1) { 2773 sendto = (rank + size - 1) % size; 2774 recvfrom = (rank + size + 1) % size; 2775 } else { 2776 sendto = recvfrom = 0; 2777 } 2778 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2780 recvisfrom = rank; 2781 for (i = 0; i < size; i++) { 2782 /* we have finished receiving in sending, bufs can be read/modified */ 2783 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2784 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2785 2786 if (nextrecvisfrom != rank) { 2787 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2788 sendsiz = cK * bn; 2789 recvsiz = cK * nextbn; 2790 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2791 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr); 2792 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr); 2793 } 2794 2795 /* local aseq * sendbuf^T */ 2796 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2797 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2798 2799 if (nextrecvisfrom != rank) { 2800 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2801 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 2802 } 2803 bn = nextbn; 2804 recvisfrom = nextrecvisfrom; 2805 sendbuf = recvbuf; 2806 } 2807 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2808 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2809 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2810 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2816 { 2817 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2818 Mat_MatTransMultDense *abt; 2819 PetscErrorCode ierr; 2820 MPI_Comm comm; 2821 PetscMPIInt size; 2822 PetscScalar *cv, *sendbuf, *recvbuf; 2823 const PetscScalar *av,*bv; 2824 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2825 PetscScalar _DOne=1.0,_DZero=0.0; 2826 PetscBLASInt cm, cn, ck, alda, clda; 2827 2828 PetscFunctionBegin; 2829 MatCheckProduct(C,3); 2830 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2831 abt = (Mat_MatTransMultDense*)C->product->data; 2832 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2833 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2834 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2835 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2836 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2837 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2838 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2839 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2840 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2841 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2842 /* copy transpose of B into buf[0] */ 2843 bn = B->rmap->n; 2844 sendbuf = abt->buf[0]; 2845 recvbuf = abt->buf[1]; 2846 for (k = 0, j = 0; j < bn; j++) { 2847 for (i = 0; i < cK; i++, k++) { 2848 sendbuf[k] = bv[i * blda + j]; 2849 } 2850 } 2851 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2852 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr); 2853 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2854 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2855 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2856 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2857 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2858 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2859 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2860 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2866 { 2867 Mat_MatTransMultDense *abt; 2868 PetscErrorCode ierr; 2869 2870 PetscFunctionBegin; 2871 MatCheckProduct(C,3); 2872 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2873 abt = (Mat_MatTransMultDense*)C->product->data; 2874 switch (abt->alg) { 2875 case 1: 2876 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2877 break; 2878 default: 2879 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2880 break; 2881 } 2882 PetscFunctionReturn(0); 2883 } 2884 2885 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2886 { 2887 PetscErrorCode ierr; 2888 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2889 2890 PetscFunctionBegin; 2891 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2892 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2893 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2894 ierr = PetscFree(ab);CHKERRQ(ierr); 2895 PetscFunctionReturn(0); 2896 } 2897 2898 #if defined(PETSC_HAVE_ELEMENTAL) 2899 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2900 { 2901 PetscErrorCode ierr; 2902 Mat_MatMultDense *ab; 2903 2904 PetscFunctionBegin; 2905 MatCheckProduct(C,3); 2906 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2907 ab = (Mat_MatMultDense*)C->product->data; 2908 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2909 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2910 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2911 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2916 { 2917 PetscErrorCode ierr; 2918 Mat Ae,Be,Ce; 2919 Mat_MatMultDense *ab; 2920 2921 PetscFunctionBegin; 2922 MatCheckProduct(C,4); 2923 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2924 /* check local size of A and B */ 2925 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2926 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2927 } 2928 2929 /* create elemental matrices Ae and Be */ 2930 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2931 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2932 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2933 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2934 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2935 2936 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2937 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2938 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2939 ierr = MatSetUp(Be);CHKERRQ(ierr); 2940 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2941 2942 /* compute symbolic Ce = Ae*Be */ 2943 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2944 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2945 2946 /* setup C */ 2947 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2948 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2949 ierr = MatSetUp(C);CHKERRQ(ierr); 2950 2951 /* create data structure for reuse Cdense */ 2952 ierr = PetscNew(&ab);CHKERRQ(ierr); 2953 ab->Ae = Ae; 2954 ab->Be = Be; 2955 ab->Ce = Ce; 2956 2957 C->product->data = ab; 2958 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2959 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2960 PetscFunctionReturn(0); 2961 } 2962 #endif 2963 /* ----------------------------------------------- */ 2964 #if defined(PETSC_HAVE_ELEMENTAL) 2965 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2966 { 2967 PetscFunctionBegin; 2968 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2969 C->ops->productsymbolic = MatProductSymbolic_AB; 2970 PetscFunctionReturn(0); 2971 } 2972 #endif 2973 2974 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2975 { 2976 Mat_Product *product = C->product; 2977 Mat A = product->A,B=product->B; 2978 2979 PetscFunctionBegin; 2980 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2981 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2982 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2983 C->ops->productsymbolic = MatProductSymbolic_AtB; 2984 PetscFunctionReturn(0); 2985 } 2986 2987 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2988 { 2989 PetscErrorCode ierr; 2990 Mat_Product *product = C->product; 2991 const char *algTypes[2] = {"allgatherv","cyclic"}; 2992 PetscInt alg,nalg = 2; 2993 PetscBool flg = PETSC_FALSE; 2994 2995 PetscFunctionBegin; 2996 /* Set default algorithm */ 2997 alg = 0; /* default is allgatherv */ 2998 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2999 if (flg) { 3000 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3001 } 3002 3003 /* Get runtime option */ 3004 if (product->api_user) { 3005 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 3006 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3007 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3008 } else { 3009 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 3010 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3011 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3012 } 3013 if (flg) { 3014 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3015 } 3016 3017 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3018 C->ops->productsymbolic = MatProductSymbolic_ABt; 3019 PetscFunctionReturn(0); 3020 } 3021 3022 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3023 { 3024 PetscErrorCode ierr; 3025 Mat_Product *product = C->product; 3026 3027 PetscFunctionBegin; 3028 switch (product->type) { 3029 #if defined(PETSC_HAVE_ELEMENTAL) 3030 case MATPRODUCT_AB: 3031 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3032 break; 3033 #endif 3034 case MATPRODUCT_AtB: 3035 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3036 break; 3037 case MATPRODUCT_ABt: 3038 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3039 break; 3040 default: 3041 break; 3042 } 3043 PetscFunctionReturn(0); 3044 } 3045