1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 47 PetscErrorCode ierr; 48 49 PetscFunctionBegin; 50 ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 57 PetscErrorCode ierr; 58 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 59 60 PetscFunctionBegin; 61 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 62 lrow = row - rstart; 63 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 68 { 69 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 70 PetscErrorCode ierr; 71 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 72 73 PetscFunctionBegin; 74 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 75 lrow = row - rstart; 76 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 81 { 82 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 83 PetscErrorCode ierr; 84 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 85 PetscScalar *array; 86 MPI_Comm comm; 87 PetscBool flg; 88 Mat B; 89 90 PetscFunctionBegin; 91 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 92 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 93 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 94 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 95 96 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 97 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 98 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 99 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 100 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 101 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 102 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 103 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 104 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 105 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 107 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 108 *a = B; 109 ierr = MatDestroy(&B);CHKERRQ(ierr); 110 } else *a = B; 111 PetscFunctionReturn(0); 112 } 113 114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 117 PetscErrorCode ierr; 118 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 119 PetscBool roworiented = A->roworiented; 120 121 PetscFunctionBegin; 122 for (i=0; i<m; i++) { 123 if (idxm[i] < 0) continue; 124 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 125 if (idxm[i] >= rstart && idxm[i] < rend) { 126 row = idxm[i] - rstart; 127 if (roworiented) { 128 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 129 } else { 130 for (j=0; j<n; j++) { 131 if (idxn[j] < 0) continue; 132 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 133 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 134 } 135 } 136 } else if (!A->donotstash) { 137 mat->assembled = PETSC_FALSE; 138 if (roworiented) { 139 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 140 } else { 141 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 142 } 143 } 144 } 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 149 { 150 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 153 154 PetscFunctionBegin; 155 for (i=0; i<m; i++) { 156 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 157 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 158 if (idxm[i] >= rstart && idxm[i] < rend) { 159 row = idxm[i] - rstart; 160 for (j=0; j<n; j++) { 161 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 162 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 163 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 164 } 165 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscBool iscuda; 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 if (!a->A) { 188 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 189 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 190 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 191 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 192 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 193 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 194 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 195 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 196 } 197 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 230 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 235 { 236 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 241 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 242 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 247 { 248 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 249 PetscErrorCode ierr; 250 251 PetscFunctionBegin; 252 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 253 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 254 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 259 { 260 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 265 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 266 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 271 { 272 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 273 PetscErrorCode ierr; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 284 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 285 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 288 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 289 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 290 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 291 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 292 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 309 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 310 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 311 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 317 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 318 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 326 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 /* Free work space */ 333 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 334 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 335 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 371 { 372 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 373 PetscErrorCode ierr; 374 PetscInt nstash,reallocs; 375 376 PetscFunctionBegin; 377 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 378 379 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 380 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 381 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 386 { 387 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 388 PetscErrorCode ierr; 389 PetscInt i,*row,*col,flg,j,rstart,ncols; 390 PetscMPIInt n; 391 PetscScalar *val; 392 393 PetscFunctionBegin; 394 if (!mdn->donotstash && !mat->nooffprocentries) { 395 /* wait on receives */ 396 while (1) { 397 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 398 if (!flg) break; 399 400 for (i=0; i<n;) { 401 /* Now identify the consecutive vals belonging to the same row */ 402 for (j=i,rstart=row[j]; j<n; j++) { 403 if (row[j] != rstart) break; 404 } 405 if (j < n) ncols = j-i; 406 else ncols = n-i; 407 /* Now assemble all these values with a single function call */ 408 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 409 i = j; 410 } 411 } 412 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 413 } 414 415 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 417 418 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 419 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 425 { 426 PetscErrorCode ierr; 427 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 428 429 PetscFunctionBegin; 430 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 437 PetscErrorCode ierr; 438 PetscInt i,len,*lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 449 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 450 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 451 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 452 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 453 } 454 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 455 if (diag != 0.0) { 456 Vec d; 457 458 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 459 ierr = VecSet(d,diag);CHKERRQ(ierr); 460 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecDestroy(&d);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(lrows);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype,aymtype; 479 480 PetscFunctionBegin; 481 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 482 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 483 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 484 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 485 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 486 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 487 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype,aymtype; 498 499 PetscFunctionBegin; 500 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 501 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 502 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 503 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 504 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 505 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 506 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype,aymtype; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 520 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 522 ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr); 523 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 524 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 525 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 526 ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 PetscErrorCode ierr; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype,aymtype; 537 538 PetscFunctionBegin; 539 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 540 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 541 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 542 ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr); 543 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 544 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 545 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 546 ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 553 PetscErrorCode ierr; 554 PetscInt lda,len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 const PetscScalar *av; 557 558 PetscFunctionBegin; 559 ierr = VecSet(v,zero);CHKERRQ(ierr); 560 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 561 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 562 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 563 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 564 radd = A->rmap->rstart*m; 565 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 566 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 567 for (i=0; i<len; i++) { 568 x[i] = av[radd + i*lda + i]; 569 } 570 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 571 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatDestroy_MPIDense(Mat mat) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 #if defined(PETSC_USE_LOG) 582 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 583 #endif 584 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 585 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 586 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 587 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 588 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 589 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 590 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 591 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 592 593 ierr = PetscFree(mat->data);CHKERRQ(ierr); 594 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 595 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 607 #if defined(PETSC_HAVE_ELEMENTAL) 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 609 #endif 610 #if defined(PETSC_HAVE_SCALAPACK) 611 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 612 #endif 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 614 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 616 #if defined (PETSC_HAVE_CUDA) 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 618 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 619 #endif 620 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 621 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 622 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 623 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 624 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 625 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 628 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 634 635 #include <petscdraw.h> 636 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 637 { 638 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 639 PetscErrorCode ierr; 640 PetscMPIInt rank; 641 PetscViewerType vtype; 642 PetscBool iascii,isdraw; 643 PetscViewer sviewer; 644 PetscViewerFormat format; 645 646 PetscFunctionBegin; 647 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 648 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 649 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 650 if (iascii) { 651 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 652 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 653 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 654 MatInfo info; 655 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 656 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 657 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 658 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 660 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } else if (format == PETSC_VIEWER_ASCII_INFO) { 663 PetscFunctionReturn(0); 664 } 665 } else if (isdraw) { 666 PetscDraw draw; 667 PetscBool isnull; 668 669 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 670 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 671 if (isnull) PetscFunctionReturn(0); 672 } 673 674 { 675 /* assemble the entire matrix onto first processor. */ 676 Mat A; 677 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 678 PetscInt *cols; 679 PetscScalar *vals; 680 681 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 682 if (!rank) { 683 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 684 } else { 685 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 686 } 687 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 688 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 689 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 690 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 691 692 /* Copy the matrix ... This isn't the most efficient means, 693 but it's quick for now */ 694 A->insertmode = INSERT_VALUES; 695 696 row = mat->rmap->rstart; 697 m = mdn->A->rmap->n; 698 for (i=0; i<m; i++) { 699 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 700 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 702 row++; 703 } 704 705 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 707 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 708 if (!rank) { 709 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 710 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 711 } 712 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 713 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 714 ierr = MatDestroy(&A);CHKERRQ(ierr); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 720 { 721 PetscErrorCode ierr; 722 PetscBool iascii,isbinary,isdraw,issocket; 723 724 PetscFunctionBegin; 725 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 726 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 727 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 729 730 if (iascii || issocket || isdraw) { 731 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 732 } else if (isbinary) { 733 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 739 { 740 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 741 Mat mdn = mat->A; 742 PetscErrorCode ierr; 743 PetscLogDouble isend[5],irecv[5]; 744 745 PetscFunctionBegin; 746 info->block_size = 1.0; 747 748 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 749 750 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 751 isend[3] = info->memory; isend[4] = info->mallocs; 752 if (flag == MAT_LOCAL) { 753 info->nz_used = isend[0]; 754 info->nz_allocated = isend[1]; 755 info->nz_unneeded = isend[2]; 756 info->memory = isend[3]; 757 info->mallocs = isend[4]; 758 } else if (flag == MAT_GLOBAL_MAX) { 759 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 760 761 info->nz_used = irecv[0]; 762 info->nz_allocated = irecv[1]; 763 info->nz_unneeded = irecv[2]; 764 info->memory = irecv[3]; 765 info->mallocs = irecv[4]; 766 } else if (flag == MAT_GLOBAL_SUM) { 767 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 768 769 info->nz_used = irecv[0]; 770 info->nz_allocated = irecv[1]; 771 info->nz_unneeded = irecv[2]; 772 info->memory = irecv[3]; 773 info->mallocs = irecv[4]; 774 } 775 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 776 info->fill_ratio_needed = 0; 777 info->factor_mallocs = 0; 778 PetscFunctionReturn(0); 779 } 780 781 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 782 { 783 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 switch (op) { 788 case MAT_NEW_NONZERO_LOCATIONS: 789 case MAT_NEW_NONZERO_LOCATION_ERR: 790 case MAT_NEW_NONZERO_ALLOCATION_ERR: 791 MatCheckPreallocated(A,1); 792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 793 break; 794 case MAT_ROW_ORIENTED: 795 MatCheckPreallocated(A,1); 796 a->roworiented = flg; 797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 798 break; 799 case MAT_FORCE_DIAGONAL_ENTRIES: 800 case MAT_KEEP_NONZERO_PATTERN: 801 case MAT_USE_HASH_TABLE: 802 case MAT_SORTED_FULL: 803 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 804 break; 805 case MAT_IGNORE_OFF_PROC_ENTRIES: 806 a->donotstash = flg; 807 break; 808 case MAT_SYMMETRIC: 809 case MAT_STRUCTURALLY_SYMMETRIC: 810 case MAT_HERMITIAN: 811 case MAT_SYMMETRY_ETERNAL: 812 case MAT_IGNORE_LOWER_TRIANGULAR: 813 case MAT_IGNORE_ZERO_ENTRIES: 814 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 815 break; 816 default: 817 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 818 } 819 PetscFunctionReturn(0); 820 } 821 822 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 823 { 824 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 825 const PetscScalar *l; 826 PetscScalar x,*v,*vv,*r; 827 PetscErrorCode ierr; 828 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 829 830 PetscFunctionBegin; 831 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 832 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 833 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 834 if (ll) { 835 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 836 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 837 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 838 for (i=0; i<m; i++) { 839 x = l[i]; 840 v = vv + i; 841 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 842 } 843 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 844 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 845 } 846 if (rr) { 847 const PetscScalar *ar; 848 849 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 850 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 851 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 852 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 853 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 854 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 855 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 856 for (i=0; i<n; i++) { 857 x = r[i]; 858 v = vv + i*lda; 859 for (j=0; j<m; j++) (*v++) *= x; 860 } 861 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 862 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 863 } 864 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 869 { 870 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 871 PetscErrorCode ierr; 872 PetscInt i,j; 873 PetscMPIInt size; 874 PetscReal sum = 0.0; 875 const PetscScalar *av,*v; 876 877 PetscFunctionBegin; 878 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 879 v = av; 880 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 881 if (size == 1) { 882 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 883 } else { 884 if (type == NORM_FROBENIUS) { 885 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 886 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 887 } 888 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 889 *nrm = PetscSqrtReal(*nrm); 890 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 891 } else if (type == NORM_1) { 892 PetscReal *tmp,*tmp2; 893 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 894 *nrm = 0.0; 895 v = av; 896 for (j=0; j<mdn->A->cmap->n; j++) { 897 for (i=0; i<mdn->A->rmap->n; i++) { 898 tmp[j] += PetscAbsScalar(*v); v++; 899 } 900 } 901 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 902 for (j=0; j<A->cmap->N; j++) { 903 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 904 } 905 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 906 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 907 } else if (type == NORM_INFINITY) { /* max row norm */ 908 PetscReal ntemp; 909 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 910 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 911 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 912 } 913 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916 917 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 918 { 919 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 920 Mat B; 921 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 922 PetscErrorCode ierr; 923 PetscInt j,i,lda; 924 PetscScalar *v; 925 926 PetscFunctionBegin; 927 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 928 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 929 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 930 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 931 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 932 } else B = *matout; 933 934 m = a->A->rmap->n; n = a->A->cmap->n; 935 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 936 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 937 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 938 for (i=0; i<m; i++) rwork[i] = rstart + i; 939 for (j=0; j<n; j++) { 940 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 941 v += lda; 942 } 943 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 944 ierr = PetscFree(rwork);CHKERRQ(ierr); 945 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 946 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 947 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 948 *matout = B; 949 } else { 950 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 956 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 957 958 PetscErrorCode MatSetUp_MPIDense(Mat A) 959 { 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 964 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 965 if (!A->preallocated) { 966 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 972 { 973 PetscErrorCode ierr; 974 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 975 976 PetscFunctionBegin; 977 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatConjugate_MPIDense(Mat mat) 982 { 983 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 ierr = MatConjugate(a->A);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatRealPart_MPIDense(Mat A) 992 { 993 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 ierr = MatRealPart(a->A);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1002 { 1003 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1012 { 1013 PetscErrorCode ierr; 1014 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1015 1016 PetscFunctionBegin; 1017 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1018 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1019 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1024 1025 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1026 { 1027 PetscErrorCode ierr; 1028 PetscInt i,n; 1029 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1030 PetscReal *work; 1031 1032 PetscFunctionBegin; 1033 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1034 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1035 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1036 if (type == NORM_2) { 1037 for (i=0; i<n; i++) work[i] *= work[i]; 1038 } 1039 if (type == NORM_INFINITY) { 1040 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1041 } else { 1042 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1043 } 1044 ierr = PetscFree(work);CHKERRQ(ierr); 1045 if (type == NORM_2) { 1046 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #if defined(PETSC_HAVE_CUDA) 1052 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1053 { 1054 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt lda; 1057 1058 PetscFunctionBegin; 1059 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1060 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1061 if (!a->cvec) { 1062 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1063 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1064 } 1065 a->vecinuse = col + 1; 1066 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1067 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1068 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1069 *v = a->cvec; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1074 { 1075 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1076 PetscErrorCode ierr; 1077 1078 PetscFunctionBegin; 1079 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1080 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1081 a->vecinuse = 0; 1082 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1083 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1084 *v = NULL; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1089 { 1090 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1091 PetscInt lda; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1096 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1097 if (!a->cvec) { 1098 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1099 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1100 } 1101 a->vecinuse = col + 1; 1102 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1103 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1104 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1105 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1106 *v = a->cvec; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1111 { 1112 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1117 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1118 a->vecinuse = 0; 1119 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1120 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1121 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1122 *v = NULL; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1127 { 1128 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1129 PetscErrorCode ierr; 1130 PetscInt lda; 1131 1132 PetscFunctionBegin; 1133 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1134 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1135 if (!a->cvec) { 1136 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1137 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1138 } 1139 a->vecinuse = col + 1; 1140 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1141 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1142 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1143 *v = a->cvec; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1148 { 1149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1154 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1155 a->vecinuse = 0; 1156 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1157 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1158 *v = NULL; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1163 { 1164 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1169 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1170 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1175 { 1176 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1181 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1182 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1187 { 1188 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1193 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1194 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1199 { 1200 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1209 { 1210 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1219 { 1220 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1229 { 1230 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1239 { 1240 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1249 { 1250 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1259 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1260 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1261 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1262 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1263 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1264 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1265 1266 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1267 { 1268 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1273 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1274 if (d->A) { 1275 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1276 } 1277 mat->boundtocpu = bind; 1278 if (!bind) { 1279 PetscBool iscuda; 1280 1281 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1282 if (!iscuda) { 1283 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1284 } 1285 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1286 if (!iscuda) { 1287 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1288 } 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1295 } else { 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1302 } 1303 if (d->cmat) { 1304 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1310 { 1311 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1312 PetscErrorCode ierr; 1313 PetscBool iscuda; 1314 1315 PetscFunctionBegin; 1316 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1317 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1318 if (!iscuda) PetscFunctionReturn(0); 1319 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1320 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1321 if (!d->A) { 1322 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1323 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1324 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1325 } 1326 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1327 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1328 A->preallocated = PETSC_TRUE; 1329 PetscFunctionReturn(0); 1330 } 1331 #endif 1332 1333 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1334 { 1335 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1344 { 1345 PetscFunctionBegin; 1346 *missing = PETSC_FALSE; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1351 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1352 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1353 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1354 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1355 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1356 1357 /* -------------------------------------------------------------------*/ 1358 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1359 MatGetRow_MPIDense, 1360 MatRestoreRow_MPIDense, 1361 MatMult_MPIDense, 1362 /* 4*/ MatMultAdd_MPIDense, 1363 MatMultTranspose_MPIDense, 1364 MatMultTransposeAdd_MPIDense, 1365 NULL, 1366 NULL, 1367 NULL, 1368 /* 10*/ NULL, 1369 NULL, 1370 NULL, 1371 NULL, 1372 MatTranspose_MPIDense, 1373 /* 15*/ MatGetInfo_MPIDense, 1374 MatEqual_MPIDense, 1375 MatGetDiagonal_MPIDense, 1376 MatDiagonalScale_MPIDense, 1377 MatNorm_MPIDense, 1378 /* 20*/ MatAssemblyBegin_MPIDense, 1379 MatAssemblyEnd_MPIDense, 1380 MatSetOption_MPIDense, 1381 MatZeroEntries_MPIDense, 1382 /* 24*/ MatZeroRows_MPIDense, 1383 NULL, 1384 NULL, 1385 NULL, 1386 NULL, 1387 /* 29*/ MatSetUp_MPIDense, 1388 NULL, 1389 NULL, 1390 MatGetDiagonalBlock_MPIDense, 1391 NULL, 1392 /* 34*/ MatDuplicate_MPIDense, 1393 NULL, 1394 NULL, 1395 NULL, 1396 NULL, 1397 /* 39*/ MatAXPY_MPIDense, 1398 MatCreateSubMatrices_MPIDense, 1399 NULL, 1400 MatGetValues_MPIDense, 1401 MatCopy_MPIDense, 1402 /* 44*/ NULL, 1403 MatScale_MPIDense, 1404 MatShift_Basic, 1405 NULL, 1406 NULL, 1407 /* 49*/ MatSetRandom_MPIDense, 1408 NULL, 1409 NULL, 1410 NULL, 1411 NULL, 1412 /* 54*/ NULL, 1413 NULL, 1414 NULL, 1415 NULL, 1416 NULL, 1417 /* 59*/ MatCreateSubMatrix_MPIDense, 1418 MatDestroy_MPIDense, 1419 MatView_MPIDense, 1420 NULL, 1421 NULL, 1422 /* 64*/ NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 NULL, 1427 /* 69*/ NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 NULL, 1432 /* 74*/ NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /* 79*/ NULL, 1438 NULL, 1439 NULL, 1440 NULL, 1441 /* 83*/ MatLoad_MPIDense, 1442 NULL, 1443 NULL, 1444 NULL, 1445 NULL, 1446 NULL, 1447 /* 89*/ NULL, 1448 NULL, 1449 NULL, 1450 NULL, 1451 NULL, 1452 /* 94*/ NULL, 1453 NULL, 1454 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1455 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1456 NULL, 1457 /* 99*/ MatProductSetFromOptions_MPIDense, 1458 NULL, 1459 NULL, 1460 MatConjugate_MPIDense, 1461 NULL, 1462 /*104*/ NULL, 1463 MatRealPart_MPIDense, 1464 MatImaginaryPart_MPIDense, 1465 NULL, 1466 NULL, 1467 /*109*/ NULL, 1468 NULL, 1469 NULL, 1470 MatGetColumnVector_MPIDense, 1471 MatMissingDiagonal_MPIDense, 1472 /*114*/ NULL, 1473 NULL, 1474 NULL, 1475 NULL, 1476 NULL, 1477 /*119*/ NULL, 1478 NULL, 1479 NULL, 1480 NULL, 1481 NULL, 1482 /*124*/ NULL, 1483 MatGetColumnNorms_MPIDense, 1484 NULL, 1485 NULL, 1486 NULL, 1487 /*129*/ NULL, 1488 NULL, 1489 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1490 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1491 NULL, 1492 /*134*/ NULL, 1493 NULL, 1494 NULL, 1495 NULL, 1496 NULL, 1497 /*139*/ NULL, 1498 NULL, 1499 NULL, 1500 NULL, 1501 NULL, 1502 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1503 /*145*/ NULL, 1504 NULL, 1505 NULL 1506 }; 1507 1508 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1509 { 1510 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1511 PetscBool iscuda; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1516 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1517 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1518 if (!a->A) { 1519 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1520 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1521 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1522 } 1523 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1524 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1525 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1526 mat->preallocated = PETSC_TRUE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #if defined(PETSC_HAVE_ELEMENTAL) 1531 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1532 { 1533 Mat mat_elemental; 1534 PetscErrorCode ierr; 1535 PetscScalar *v; 1536 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1537 1538 PetscFunctionBegin; 1539 if (reuse == MAT_REUSE_MATRIX) { 1540 mat_elemental = *newmat; 1541 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1542 } else { 1543 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1544 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1545 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1546 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1547 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1548 } 1549 1550 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1551 for (i=0; i<N; i++) cols[i] = i; 1552 for (i=0; i<m; i++) rows[i] = rstart + i; 1553 1554 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1555 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1556 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1557 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1558 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1559 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1560 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1561 1562 if (reuse == MAT_INPLACE_MATRIX) { 1563 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1564 } else { 1565 *newmat = mat_elemental; 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 #endif 1570 1571 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1572 { 1573 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1574 PetscErrorCode ierr; 1575 1576 PetscFunctionBegin; 1577 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1582 { 1583 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1592 { 1593 PetscErrorCode ierr; 1594 Mat_MPIDense *mat; 1595 PetscInt m,nloc,N; 1596 1597 PetscFunctionBegin; 1598 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1599 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1600 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1601 PetscInt sum; 1602 1603 if (n == PETSC_DECIDE) { 1604 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1605 } 1606 /* Check sum(n) = N */ 1607 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1608 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1609 1610 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1611 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1612 } 1613 1614 /* numeric phase */ 1615 mat = (Mat_MPIDense*)(*outmat)->data; 1616 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1617 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1618 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #if defined(PETSC_HAVE_CUDA) 1623 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1624 { 1625 Mat B; 1626 Mat_MPIDense *m; 1627 PetscErrorCode ierr; 1628 1629 PetscFunctionBegin; 1630 if (reuse == MAT_INITIAL_MATRIX) { 1631 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1632 } else if (reuse == MAT_REUSE_MATRIX) { 1633 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1634 } 1635 1636 B = *newmat; 1637 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1638 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1639 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1640 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1655 m = (Mat_MPIDense*)(B)->data; 1656 if (m->A) { 1657 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1658 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1659 } 1660 B->ops->bindtocpu = NULL; 1661 B->offloadmask = PETSC_OFFLOAD_CPU; 1662 PetscFunctionReturn(0); 1663 } 1664 1665 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1666 { 1667 Mat B; 1668 Mat_MPIDense *m; 1669 PetscErrorCode ierr; 1670 1671 PetscFunctionBegin; 1672 if (reuse == MAT_INITIAL_MATRIX) { 1673 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1674 } else if (reuse == MAT_REUSE_MATRIX) { 1675 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1676 } 1677 1678 B = *newmat; 1679 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1680 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1681 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1696 m = (Mat_MPIDense*)(B)->data; 1697 if (m->A) { 1698 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1699 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1700 B->offloadmask = PETSC_OFFLOAD_BOTH; 1701 } else { 1702 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1703 } 1704 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1705 1706 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1707 PetscFunctionReturn(0); 1708 } 1709 #endif 1710 1711 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1712 { 1713 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1714 PetscErrorCode ierr; 1715 PetscInt lda; 1716 1717 PetscFunctionBegin; 1718 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1719 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1720 if (!a->cvec) { 1721 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1722 } 1723 a->vecinuse = col + 1; 1724 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1725 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1726 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1727 *v = a->cvec; 1728 PetscFunctionReturn(0); 1729 } 1730 1731 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1732 { 1733 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1734 PetscErrorCode ierr; 1735 1736 PetscFunctionBegin; 1737 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1738 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1739 a->vecinuse = 0; 1740 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1741 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1742 *v = NULL; 1743 PetscFunctionReturn(0); 1744 } 1745 1746 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1747 { 1748 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1749 PetscErrorCode ierr; 1750 PetscInt lda; 1751 1752 PetscFunctionBegin; 1753 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1754 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1755 if (!a->cvec) { 1756 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1757 } 1758 a->vecinuse = col + 1; 1759 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1760 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1761 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1762 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1763 *v = a->cvec; 1764 PetscFunctionReturn(0); 1765 } 1766 1767 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1768 { 1769 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1774 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1775 a->vecinuse = 0; 1776 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1777 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1778 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1779 *v = NULL; 1780 PetscFunctionReturn(0); 1781 } 1782 1783 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1784 { 1785 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1786 PetscErrorCode ierr; 1787 PetscInt lda; 1788 1789 PetscFunctionBegin; 1790 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1791 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1792 if (!a->cvec) { 1793 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1794 } 1795 a->vecinuse = col + 1; 1796 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1797 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1798 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1799 *v = a->cvec; 1800 PetscFunctionReturn(0); 1801 } 1802 1803 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1804 { 1805 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1810 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1811 a->vecinuse = 0; 1812 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1813 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1814 *v = NULL; 1815 PetscFunctionReturn(0); 1816 } 1817 1818 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1819 { 1820 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1821 Mat_MPIDense *c; 1822 PetscErrorCode ierr; 1823 MPI_Comm comm; 1824 PetscBool setup = PETSC_FALSE; 1825 1826 PetscFunctionBegin; 1827 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1828 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1829 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1830 if (!a->cmat) { 1831 setup = PETSC_TRUE; 1832 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1833 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1834 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1835 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1836 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1837 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1838 } else if (cend-cbegin != a->cmat->cmap->N) { 1839 setup = PETSC_TRUE; 1840 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1841 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1842 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1843 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1844 } 1845 c = (Mat_MPIDense*)a->cmat->data; 1846 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1847 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1848 if (setup) { /* do we really need this? */ 1849 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1850 } 1851 a->cmat->preallocated = PETSC_TRUE; 1852 a->cmat->assembled = PETSC_TRUE; 1853 a->matinuse = cbegin + 1; 1854 *v = a->cmat; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1859 { 1860 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1861 Mat_MPIDense *c; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1866 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1867 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1868 a->matinuse = 0; 1869 c = (Mat_MPIDense*)a->cmat->data; 1870 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1871 *v = NULL; 1872 PetscFunctionReturn(0); 1873 } 1874 1875 /*MC 1876 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1877 1878 Options Database Keys: 1879 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1880 1881 Level: beginner 1882 1883 .seealso: MatCreateDense() 1884 1885 M*/ 1886 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1887 { 1888 Mat_MPIDense *a; 1889 PetscErrorCode ierr; 1890 1891 PetscFunctionBegin; 1892 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1893 mat->data = (void*)a; 1894 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1895 1896 mat->insertmode = NOT_SET_VALUES; 1897 1898 /* build cache for off array entries formed */ 1899 a->donotstash = PETSC_FALSE; 1900 1901 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1902 1903 /* stuff used for matrix vector multiply */ 1904 a->lvec = NULL; 1905 a->Mvctx = NULL; 1906 a->roworiented = PETSC_TRUE; 1907 1908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1923 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1924 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1925 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1926 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1927 #if defined(PETSC_HAVE_ELEMENTAL) 1928 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1929 #endif 1930 #if defined(PETSC_HAVE_SCALAPACK) 1931 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1932 #endif 1933 #if defined(PETSC_HAVE_CUDA) 1934 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1935 #endif 1936 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1937 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1938 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1939 #if defined(PETSC_HAVE_CUDA) 1940 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1941 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1942 #endif 1943 1944 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1945 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1946 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1947 PetscFunctionReturn(0); 1948 } 1949 1950 /*MC 1951 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1952 1953 Options Database Keys: 1954 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1955 1956 Level: beginner 1957 1958 .seealso: 1959 1960 M*/ 1961 #if defined(PETSC_HAVE_CUDA) 1962 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1963 { 1964 PetscErrorCode ierr; 1965 1966 PetscFunctionBegin; 1967 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 1968 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1969 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 #endif 1973 1974 /*MC 1975 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1976 1977 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1978 and MATMPIDENSE otherwise. 1979 1980 Options Database Keys: 1981 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1982 1983 Level: beginner 1984 1985 1986 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1987 M*/ 1988 1989 /*MC 1990 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1991 1992 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1993 and MATMPIDENSECUDA otherwise. 1994 1995 Options Database Keys: 1996 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1997 1998 Level: beginner 1999 2000 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 2001 M*/ 2002 2003 /*@C 2004 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2005 2006 Collective 2007 2008 Input Parameters: 2009 . B - the matrix 2010 - data - optional location of matrix data. Set data=NULL for PETSc 2011 to control all matrix memory allocation. 2012 2013 Notes: 2014 The dense format is fully compatible with standard Fortran 77 2015 storage by columns. 2016 2017 The data input variable is intended primarily for Fortran programmers 2018 who wish to allocate their own matrix memory space. Most users should 2019 set data=NULL. 2020 2021 Level: intermediate 2022 2023 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2024 @*/ 2025 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2026 { 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2031 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 /*@ 2036 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2037 array provided by the user. This is useful to avoid copying an array 2038 into a matrix 2039 2040 Not Collective 2041 2042 Input Parameters: 2043 + mat - the matrix 2044 - array - the array in column major order 2045 2046 Notes: 2047 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2048 freed when the matrix is destroyed. 2049 2050 Level: developer 2051 2052 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2053 2054 @*/ 2055 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2056 { 2057 PetscErrorCode ierr; 2058 2059 PetscFunctionBegin; 2060 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2061 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2062 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2063 #if defined(PETSC_HAVE_CUDA) 2064 mat->offloadmask = PETSC_OFFLOAD_CPU; 2065 #endif 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /*@ 2070 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2071 2072 Not Collective 2073 2074 Input Parameters: 2075 . mat - the matrix 2076 2077 Notes: 2078 You can only call this after a call to MatDensePlaceArray() 2079 2080 Level: developer 2081 2082 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2083 2084 @*/ 2085 PetscErrorCode MatDenseResetArray(Mat mat) 2086 { 2087 PetscErrorCode ierr; 2088 2089 PetscFunctionBegin; 2090 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2091 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2092 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2093 PetscFunctionReturn(0); 2094 } 2095 2096 /*@ 2097 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2098 array provided by the user. This is useful to avoid copying an array 2099 into a matrix 2100 2101 Not Collective 2102 2103 Input Parameters: 2104 + mat - the matrix 2105 - array - the array in column major order 2106 2107 Notes: 2108 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2109 freed by the user. It will be freed when the matrix is destroyed. 2110 2111 Level: developer 2112 2113 .seealso: MatDenseGetArray(), VecReplaceArray() 2114 @*/ 2115 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2116 { 2117 PetscErrorCode ierr; 2118 2119 PetscFunctionBegin; 2120 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2121 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2122 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2123 #if defined(PETSC_HAVE_CUDA) 2124 mat->offloadmask = PETSC_OFFLOAD_CPU; 2125 #endif 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #if defined(PETSC_HAVE_CUDA) 2130 /*@C 2131 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2132 array provided by the user. This is useful to avoid copying an array 2133 into a matrix 2134 2135 Not Collective 2136 2137 Input Parameters: 2138 + mat - the matrix 2139 - array - the array in column major order 2140 2141 Notes: 2142 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2143 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2144 2145 Level: developer 2146 2147 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2148 @*/ 2149 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2150 { 2151 PetscErrorCode ierr; 2152 2153 PetscFunctionBegin; 2154 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2155 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2156 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2157 mat->offloadmask = PETSC_OFFLOAD_GPU; 2158 PetscFunctionReturn(0); 2159 } 2160 2161 /*@C 2162 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2163 2164 Not Collective 2165 2166 Input Parameters: 2167 . mat - the matrix 2168 2169 Notes: 2170 You can only call this after a call to MatDenseCUDAPlaceArray() 2171 2172 Level: developer 2173 2174 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2175 2176 @*/ 2177 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2178 { 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2183 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2184 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 /*@C 2189 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2190 array provided by the user. This is useful to avoid copying an array 2191 into a matrix 2192 2193 Not Collective 2194 2195 Input Parameters: 2196 + mat - the matrix 2197 - array - the array in column major order 2198 2199 Notes: 2200 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2201 The memory passed in CANNOT be freed by the user. It will be freed 2202 when the matrix is destroyed. The array should respect the matrix leading dimension. 2203 2204 Level: developer 2205 2206 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2207 @*/ 2208 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2209 { 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2214 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2215 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2216 mat->offloadmask = PETSC_OFFLOAD_GPU; 2217 PetscFunctionReturn(0); 2218 } 2219 2220 /*@C 2221 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2222 2223 Not Collective 2224 2225 Input Parameters: 2226 . A - the matrix 2227 2228 Output Parameters 2229 . array - the GPU array in column major order 2230 2231 Notes: 2232 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2233 2234 Level: developer 2235 2236 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2237 @*/ 2238 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2239 { 2240 PetscErrorCode ierr; 2241 2242 PetscFunctionBegin; 2243 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2244 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2245 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } 2248 2249 /*@C 2250 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2251 2252 Not Collective 2253 2254 Input Parameters: 2255 + A - the matrix 2256 - array - the GPU array in column major order 2257 2258 Notes: 2259 2260 Level: developer 2261 2262 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2263 @*/ 2264 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2265 { 2266 PetscErrorCode ierr; 2267 2268 PetscFunctionBegin; 2269 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2270 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2271 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2272 A->offloadmask = PETSC_OFFLOAD_GPU; 2273 PetscFunctionReturn(0); 2274 } 2275 2276 /*@C 2277 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2278 2279 Not Collective 2280 2281 Input Parameters: 2282 . A - the matrix 2283 2284 Output Parameters 2285 . array - the GPU array in column major order 2286 2287 Notes: 2288 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2289 2290 Level: developer 2291 2292 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2293 @*/ 2294 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2295 { 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2300 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 /*@C 2305 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2306 2307 Not Collective 2308 2309 Input Parameters: 2310 + A - the matrix 2311 - array - the GPU array in column major order 2312 2313 Notes: 2314 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2315 2316 Level: developer 2317 2318 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2319 @*/ 2320 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2321 { 2322 PetscErrorCode ierr; 2323 2324 PetscFunctionBegin; 2325 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 /*@C 2330 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2331 2332 Not Collective 2333 2334 Input Parameters: 2335 . A - the matrix 2336 2337 Output Parameters 2338 . array - the GPU array in column major order 2339 2340 Notes: 2341 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2342 2343 Level: developer 2344 2345 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2346 @*/ 2347 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2348 { 2349 PetscErrorCode ierr; 2350 2351 PetscFunctionBegin; 2352 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2353 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2354 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2355 PetscFunctionReturn(0); 2356 } 2357 2358 /*@C 2359 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2360 2361 Not Collective 2362 2363 Input Parameters: 2364 + A - the matrix 2365 - array - the GPU array in column major order 2366 2367 Notes: 2368 2369 Level: developer 2370 2371 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2372 @*/ 2373 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2374 { 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2379 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2380 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2381 A->offloadmask = PETSC_OFFLOAD_GPU; 2382 PetscFunctionReturn(0); 2383 } 2384 #endif 2385 2386 /*@C 2387 MatCreateDense - Creates a matrix in dense format. 2388 2389 Collective 2390 2391 Input Parameters: 2392 + comm - MPI communicator 2393 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2394 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2395 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2396 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2397 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2398 to control all matrix memory allocation. 2399 2400 Output Parameter: 2401 . A - the matrix 2402 2403 Notes: 2404 The dense format is fully compatible with standard Fortran 77 2405 storage by columns. 2406 Note that, although local portions of the matrix are stored in column-major 2407 order, the matrix is partitioned across MPI ranks by row. 2408 2409 The data input variable is intended primarily for Fortran programmers 2410 who wish to allocate their own matrix memory space. Most users should 2411 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2412 2413 The user MUST specify either the local or global matrix dimensions 2414 (possibly both). 2415 2416 Level: intermediate 2417 2418 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2419 @*/ 2420 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2421 { 2422 PetscErrorCode ierr; 2423 PetscMPIInt size; 2424 2425 PetscFunctionBegin; 2426 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2427 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2428 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2429 if (size > 1) { 2430 PetscBool havedata = (PetscBool)!!data; 2431 2432 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2433 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2434 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 2435 if (havedata) { /* user provided data array, so no need to assemble */ 2436 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2437 (*A)->assembled = PETSC_TRUE; 2438 } 2439 } else { 2440 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2441 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2442 } 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #if defined(PETSC_HAVE_CUDA) 2447 /*@C 2448 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2449 2450 Collective 2451 2452 Input Parameters: 2453 + comm - MPI communicator 2454 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2455 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2456 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2457 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2458 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2459 to control matrix memory allocation. 2460 2461 Output Parameter: 2462 . A - the matrix 2463 2464 Notes: 2465 2466 Level: intermediate 2467 2468 .seealso: MatCreate(), MatCreateDense() 2469 @*/ 2470 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2471 { 2472 PetscErrorCode ierr; 2473 PetscMPIInt size; 2474 2475 PetscFunctionBegin; 2476 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2477 PetscValidLogicalCollectiveBool(*A,!!data,6); 2478 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2479 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2480 if (size > 1) { 2481 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2482 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2483 if (data) { /* user provided data array, so no need to assemble */ 2484 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2485 (*A)->assembled = PETSC_TRUE; 2486 } 2487 } else { 2488 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2489 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2490 } 2491 PetscFunctionReturn(0); 2492 } 2493 #endif 2494 2495 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2496 { 2497 Mat mat; 2498 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 *newmat = NULL; 2503 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2504 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2505 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2506 a = (Mat_MPIDense*)mat->data; 2507 2508 mat->factortype = A->factortype; 2509 mat->assembled = PETSC_TRUE; 2510 mat->preallocated = PETSC_TRUE; 2511 2512 mat->insertmode = NOT_SET_VALUES; 2513 a->donotstash = oldmat->donotstash; 2514 2515 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2516 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2517 2518 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2519 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2520 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2521 2522 *newmat = mat; 2523 PetscFunctionReturn(0); 2524 } 2525 2526 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2527 { 2528 PetscErrorCode ierr; 2529 PetscBool isbinary; 2530 #if defined(PETSC_HAVE_HDF5) 2531 PetscBool ishdf5; 2532 #endif 2533 2534 PetscFunctionBegin; 2535 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2536 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2537 /* force binary viewer to load .info file if it has not yet done so */ 2538 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2539 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2540 #if defined(PETSC_HAVE_HDF5) 2541 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2542 #endif 2543 if (isbinary) { 2544 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2545 #if defined(PETSC_HAVE_HDF5) 2546 } else if (ishdf5) { 2547 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2548 #endif 2549 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2554 { 2555 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2556 Mat a,b; 2557 PetscBool flg; 2558 PetscErrorCode ierr; 2559 2560 PetscFunctionBegin; 2561 a = matA->A; 2562 b = matB->A; 2563 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2564 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2569 { 2570 PetscErrorCode ierr; 2571 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2572 2573 PetscFunctionBegin; 2574 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2575 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2576 ierr = PetscFree(atb);CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2581 { 2582 PetscErrorCode ierr; 2583 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2584 2585 PetscFunctionBegin; 2586 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2587 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2588 ierr = PetscFree(abt);CHKERRQ(ierr); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2593 { 2594 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2595 Mat_TransMatMultDense *atb; 2596 PetscErrorCode ierr; 2597 MPI_Comm comm; 2598 PetscMPIInt size,*recvcounts; 2599 PetscScalar *carray,*sendbuf; 2600 const PetscScalar *atbarray; 2601 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2602 const PetscInt *ranges; 2603 2604 PetscFunctionBegin; 2605 MatCheckProduct(C,3); 2606 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2607 atb = (Mat_TransMatMultDense *)C->product->data; 2608 recvcounts = atb->recvcounts; 2609 sendbuf = atb->sendbuf; 2610 2611 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2612 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2613 2614 /* compute atbarray = aseq^T * bseq */ 2615 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2616 2617 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2618 2619 /* arrange atbarray into sendbuf */ 2620 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2621 for (proc=0, k=0; proc<size; proc++) { 2622 for (j=0; j<cN; j++) { 2623 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2624 } 2625 } 2626 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2627 2628 /* sum all atbarray to local values of C */ 2629 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2630 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr); 2631 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2632 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2633 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2634 PetscFunctionReturn(0); 2635 } 2636 2637 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2638 { 2639 PetscErrorCode ierr; 2640 MPI_Comm comm; 2641 PetscMPIInt size; 2642 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2643 Mat_TransMatMultDense *atb; 2644 PetscBool cisdense; 2645 PetscInt i; 2646 const PetscInt *ranges; 2647 2648 PetscFunctionBegin; 2649 MatCheckProduct(C,3); 2650 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2651 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2652 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2653 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2654 } 2655 2656 /* create matrix product C */ 2657 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2658 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2659 if (!cisdense) { 2660 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2661 } 2662 ierr = MatSetUp(C);CHKERRQ(ierr); 2663 2664 /* create data structure for reuse C */ 2665 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2666 ierr = PetscNew(&atb);CHKERRQ(ierr); 2667 cM = C->rmap->N; 2668 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2669 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2670 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2671 2672 C->product->data = atb; 2673 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2674 PetscFunctionReturn(0); 2675 } 2676 2677 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2678 { 2679 PetscErrorCode ierr; 2680 MPI_Comm comm; 2681 PetscMPIInt i, size; 2682 PetscInt maxRows, bufsiz; 2683 PetscMPIInt tag; 2684 PetscInt alg; 2685 Mat_MatTransMultDense *abt; 2686 Mat_Product *product = C->product; 2687 PetscBool flg; 2688 2689 PetscFunctionBegin; 2690 MatCheckProduct(C,4); 2691 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2692 /* check local size of A and B */ 2693 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2694 2695 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2696 alg = flg ? 0 : 1; 2697 2698 /* setup matrix product C */ 2699 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2700 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2701 ierr = MatSetUp(C);CHKERRQ(ierr); 2702 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2703 2704 /* create data structure for reuse C */ 2705 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2706 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2707 ierr = PetscNew(&abt);CHKERRQ(ierr); 2708 abt->tag = tag; 2709 abt->alg = alg; 2710 switch (alg) { 2711 case 1: /* alg: "cyclic" */ 2712 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2713 bufsiz = A->cmap->N * maxRows; 2714 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2715 break; 2716 default: /* alg: "allgatherv" */ 2717 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2718 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2719 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2720 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2721 break; 2722 } 2723 2724 C->product->data = abt; 2725 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2726 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2727 PetscFunctionReturn(0); 2728 } 2729 2730 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2731 { 2732 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2733 Mat_MatTransMultDense *abt; 2734 PetscErrorCode ierr; 2735 MPI_Comm comm; 2736 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2737 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2738 PetscInt i,cK=A->cmap->N,k,j,bn; 2739 PetscScalar _DOne=1.0,_DZero=0.0; 2740 const PetscScalar *av,*bv; 2741 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2742 MPI_Request reqs[2]; 2743 const PetscInt *ranges; 2744 2745 PetscFunctionBegin; 2746 MatCheckProduct(C,3); 2747 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2748 abt = (Mat_MatTransMultDense*)C->product->data; 2749 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2750 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2751 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2752 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2753 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2754 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2755 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2756 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2757 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2758 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2759 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2760 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2761 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2762 bn = B->rmap->n; 2763 if (blda == bn) { 2764 sendbuf = (PetscScalar*)bv; 2765 } else { 2766 sendbuf = abt->buf[0]; 2767 for (k = 0, i = 0; i < cK; i++) { 2768 for (j = 0; j < bn; j++, k++) { 2769 sendbuf[k] = bv[i * blda + j]; 2770 } 2771 } 2772 } 2773 if (size > 1) { 2774 sendto = (rank + size - 1) % size; 2775 recvfrom = (rank + size + 1) % size; 2776 } else { 2777 sendto = recvfrom = 0; 2778 } 2779 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2781 recvisfrom = rank; 2782 for (i = 0; i < size; i++) { 2783 /* we have finished receiving in sending, bufs can be read/modified */ 2784 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2785 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2786 2787 if (nextrecvisfrom != rank) { 2788 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2789 sendsiz = cK * bn; 2790 recvsiz = cK * nextbn; 2791 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2792 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr); 2793 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr); 2794 } 2795 2796 /* local aseq * sendbuf^T */ 2797 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2798 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2799 2800 if (nextrecvisfrom != rank) { 2801 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2802 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 2803 } 2804 bn = nextbn; 2805 recvisfrom = nextrecvisfrom; 2806 sendbuf = recvbuf; 2807 } 2808 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2809 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2810 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2811 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2817 { 2818 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2819 Mat_MatTransMultDense *abt; 2820 PetscErrorCode ierr; 2821 MPI_Comm comm; 2822 PetscMPIInt size; 2823 PetscScalar *cv, *sendbuf, *recvbuf; 2824 const PetscScalar *av,*bv; 2825 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2826 PetscScalar _DOne=1.0,_DZero=0.0; 2827 PetscBLASInt cm, cn, ck, alda, clda; 2828 2829 PetscFunctionBegin; 2830 MatCheckProduct(C,3); 2831 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2832 abt = (Mat_MatTransMultDense*)C->product->data; 2833 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2834 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2835 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2836 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2837 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2838 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2839 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2840 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2841 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2842 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2843 /* copy transpose of B into buf[0] */ 2844 bn = B->rmap->n; 2845 sendbuf = abt->buf[0]; 2846 recvbuf = abt->buf[1]; 2847 for (k = 0, j = 0; j < bn; j++) { 2848 for (i = 0; i < cK; i++, k++) { 2849 sendbuf[k] = bv[i * blda + j]; 2850 } 2851 } 2852 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2853 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr); 2854 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2855 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2856 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2857 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2858 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2859 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2860 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2861 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2867 { 2868 Mat_MatTransMultDense *abt; 2869 PetscErrorCode ierr; 2870 2871 PetscFunctionBegin; 2872 MatCheckProduct(C,3); 2873 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2874 abt = (Mat_MatTransMultDense*)C->product->data; 2875 switch (abt->alg) { 2876 case 1: 2877 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2878 break; 2879 default: 2880 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2881 break; 2882 } 2883 PetscFunctionReturn(0); 2884 } 2885 2886 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2887 { 2888 PetscErrorCode ierr; 2889 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2890 2891 PetscFunctionBegin; 2892 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2893 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2894 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2895 ierr = PetscFree(ab);CHKERRQ(ierr); 2896 PetscFunctionReturn(0); 2897 } 2898 2899 #if defined(PETSC_HAVE_ELEMENTAL) 2900 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2901 { 2902 PetscErrorCode ierr; 2903 Mat_MatMultDense *ab; 2904 2905 PetscFunctionBegin; 2906 MatCheckProduct(C,3); 2907 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2908 ab = (Mat_MatMultDense*)C->product->data; 2909 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2910 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2911 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2912 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2913 PetscFunctionReturn(0); 2914 } 2915 2916 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2917 { 2918 PetscErrorCode ierr; 2919 Mat Ae,Be,Ce; 2920 Mat_MatMultDense *ab; 2921 2922 PetscFunctionBegin; 2923 MatCheckProduct(C,4); 2924 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2925 /* check local size of A and B */ 2926 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2927 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2928 } 2929 2930 /* create elemental matrices Ae and Be */ 2931 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2932 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2933 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2934 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2935 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2936 2937 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2938 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2939 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2940 ierr = MatSetUp(Be);CHKERRQ(ierr); 2941 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2942 2943 /* compute symbolic Ce = Ae*Be */ 2944 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2945 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2946 2947 /* setup C */ 2948 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2949 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2950 ierr = MatSetUp(C);CHKERRQ(ierr); 2951 2952 /* create data structure for reuse Cdense */ 2953 ierr = PetscNew(&ab);CHKERRQ(ierr); 2954 ab->Ae = Ae; 2955 ab->Be = Be; 2956 ab->Ce = Ce; 2957 2958 C->product->data = ab; 2959 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2960 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2961 PetscFunctionReturn(0); 2962 } 2963 #endif 2964 /* ----------------------------------------------- */ 2965 #if defined(PETSC_HAVE_ELEMENTAL) 2966 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2967 { 2968 PetscFunctionBegin; 2969 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2970 C->ops->productsymbolic = MatProductSymbolic_AB; 2971 PetscFunctionReturn(0); 2972 } 2973 #endif 2974 2975 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2976 { 2977 Mat_Product *product = C->product; 2978 Mat A = product->A,B=product->B; 2979 2980 PetscFunctionBegin; 2981 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2982 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2983 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2984 C->ops->productsymbolic = MatProductSymbolic_AtB; 2985 PetscFunctionReturn(0); 2986 } 2987 2988 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2989 { 2990 PetscErrorCode ierr; 2991 Mat_Product *product = C->product; 2992 const char *algTypes[2] = {"allgatherv","cyclic"}; 2993 PetscInt alg,nalg = 2; 2994 PetscBool flg = PETSC_FALSE; 2995 2996 PetscFunctionBegin; 2997 /* Set default algorithm */ 2998 alg = 0; /* default is allgatherv */ 2999 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 3000 if (flg) { 3001 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3002 } 3003 3004 /* Get runtime option */ 3005 if (product->api_user) { 3006 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 3007 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3008 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3009 } else { 3010 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 3011 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3012 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3013 } 3014 if (flg) { 3015 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3016 } 3017 3018 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3019 C->ops->productsymbolic = MatProductSymbolic_ABt; 3020 PetscFunctionReturn(0); 3021 } 3022 3023 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3024 { 3025 PetscErrorCode ierr; 3026 Mat_Product *product = C->product; 3027 3028 PetscFunctionBegin; 3029 switch (product->type) { 3030 #if defined(PETSC_HAVE_ELEMENTAL) 3031 case MATPRODUCT_AB: 3032 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3033 break; 3034 #endif 3035 case MATPRODUCT_AtB: 3036 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3037 break; 3038 case MATPRODUCT_ABt: 3039 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3040 break; 3041 default: 3042 break; 3043 } 3044 PetscFunctionReturn(0); 3045 } 3046