1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 47 PetscErrorCode ierr; 48 49 PetscFunctionBegin; 50 ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 57 PetscErrorCode ierr; 58 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 59 60 PetscFunctionBegin; 61 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 62 lrow = row - rstart; 63 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 68 { 69 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 70 PetscErrorCode ierr; 71 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 72 73 PetscFunctionBegin; 74 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 75 lrow = row - rstart; 76 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 81 { 82 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 83 PetscErrorCode ierr; 84 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 85 PetscScalar *array; 86 MPI_Comm comm; 87 PetscBool flg; 88 Mat B; 89 90 PetscFunctionBegin; 91 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 92 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 93 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 94 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 95 96 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 97 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 98 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 99 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 100 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 101 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 102 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 103 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 104 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 105 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 107 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 108 *a = B; 109 ierr = MatDestroy(&B);CHKERRQ(ierr); 110 } else *a = B; 111 PetscFunctionReturn(0); 112 } 113 114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 117 PetscErrorCode ierr; 118 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 119 PetscBool roworiented = A->roworiented; 120 121 PetscFunctionBegin; 122 for (i=0; i<m; i++) { 123 if (idxm[i] < 0) continue; 124 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 125 if (idxm[i] >= rstart && idxm[i] < rend) { 126 row = idxm[i] - rstart; 127 if (roworiented) { 128 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 129 } else { 130 for (j=0; j<n; j++) { 131 if (idxn[j] < 0) continue; 132 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 133 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 134 } 135 } 136 } else if (!A->donotstash) { 137 mat->assembled = PETSC_FALSE; 138 if (roworiented) { 139 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 140 } else { 141 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 142 } 143 } 144 } 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 149 { 150 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 153 154 PetscFunctionBegin; 155 for (i=0; i<m; i++) { 156 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 157 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 158 if (idxm[i] >= rstart && idxm[i] < rend) { 159 row = idxm[i] - rstart; 160 for (j=0; j<n; j++) { 161 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 162 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 163 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 164 } 165 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscBool iscuda; 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 if (!a->A) { 188 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 189 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 190 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 191 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 192 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 193 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 194 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 195 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 196 } 197 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 230 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 235 { 236 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 241 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 242 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 247 { 248 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 249 PetscErrorCode ierr; 250 251 PetscFunctionBegin; 252 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 253 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 254 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 259 { 260 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 265 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 266 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 271 { 272 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 273 PetscErrorCode ierr; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 284 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 285 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 288 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 289 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 290 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 291 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 292 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 309 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 310 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 311 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 317 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 318 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 326 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 /* Free work space */ 333 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 334 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 335 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 371 { 372 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 373 PetscErrorCode ierr; 374 PetscInt nstash,reallocs; 375 376 PetscFunctionBegin; 377 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 378 379 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 380 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 381 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 386 { 387 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 388 PetscErrorCode ierr; 389 PetscInt i,*row,*col,flg,j,rstart,ncols; 390 PetscMPIInt n; 391 PetscScalar *val; 392 393 PetscFunctionBegin; 394 if (!mdn->donotstash && !mat->nooffprocentries) { 395 /* wait on receives */ 396 while (1) { 397 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 398 if (!flg) break; 399 400 for (i=0; i<n;) { 401 /* Now identify the consecutive vals belonging to the same row */ 402 for (j=i,rstart=row[j]; j<n; j++) { 403 if (row[j] != rstart) break; 404 } 405 if (j < n) ncols = j-i; 406 else ncols = n-i; 407 /* Now assemble all these values with a single function call */ 408 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 409 i = j; 410 } 411 } 412 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 413 } 414 415 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 417 418 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 419 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 425 { 426 PetscErrorCode ierr; 427 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 428 429 PetscFunctionBegin; 430 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 437 PetscErrorCode ierr; 438 PetscInt i,len,*lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 449 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 450 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 451 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 452 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 453 } 454 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 455 if (diag != 0.0) { 456 Vec d; 457 458 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 459 ierr = VecSet(d,diag);CHKERRQ(ierr); 460 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecDestroy(&d);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(lrows);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype,aymtype; 479 480 PetscFunctionBegin; 481 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 482 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 483 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr); 484 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr); 485 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 486 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 487 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype,aymtype; 498 499 PetscFunctionBegin; 500 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 501 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 502 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr); 503 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr); 504 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 505 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 506 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype,aymtype; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 520 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 522 ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr); 523 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 524 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 525 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 526 ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 PetscErrorCode ierr; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype,aymtype; 537 538 PetscFunctionBegin; 539 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 540 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 541 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 542 ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr); 543 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 544 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 545 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 546 ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 553 PetscErrorCode ierr; 554 PetscInt lda,len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 const PetscScalar *av; 557 558 PetscFunctionBegin; 559 ierr = VecSet(v,zero);CHKERRQ(ierr); 560 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 561 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 562 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 563 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 564 radd = A->rmap->rstart*m; 565 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 566 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 567 for (i=0; i<len; i++) { 568 x[i] = av[radd + i*lda + i]; 569 } 570 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 571 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatDestroy_MPIDense(Mat mat) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 #if defined(PETSC_USE_LOG) 582 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 583 #endif 584 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 585 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 586 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 587 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 588 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 589 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 590 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 591 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 592 593 ierr = PetscFree(mat->data);CHKERRQ(ierr); 594 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 595 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 609 #if defined(PETSC_HAVE_ELEMENTAL) 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 611 #endif 612 #if defined(PETSC_HAVE_SCALAPACK) 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 614 #endif 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 616 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 618 #if defined (PETSC_HAVE_CUDA) 619 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 620 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 621 #endif 622 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 623 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 624 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 625 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 628 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 630 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 631 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 636 637 #include <petscdraw.h> 638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 639 { 640 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 641 PetscErrorCode ierr; 642 PetscMPIInt rank; 643 PetscViewerType vtype; 644 PetscBool iascii,isdraw; 645 PetscViewer sviewer; 646 PetscViewerFormat format; 647 648 PetscFunctionBegin; 649 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 650 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 651 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 652 if (iascii) { 653 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 654 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 655 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 656 MatInfo info; 657 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 658 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 659 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 660 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 661 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 662 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } else if (format == PETSC_VIEWER_ASCII_INFO) { 665 PetscFunctionReturn(0); 666 } 667 } else if (isdraw) { 668 PetscDraw draw; 669 PetscBool isnull; 670 671 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 672 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 673 if (isnull) PetscFunctionReturn(0); 674 } 675 676 { 677 /* assemble the entire matrix onto first processor. */ 678 Mat A; 679 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 680 PetscInt *cols; 681 PetscScalar *vals; 682 683 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 684 if (!rank) { 685 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 686 } else { 687 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 688 } 689 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 690 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 691 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 692 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 693 694 /* Copy the matrix ... This isn't the most efficient means, 695 but it's quick for now */ 696 A->insertmode = INSERT_VALUES; 697 698 row = mat->rmap->rstart; 699 m = mdn->A->rmap->n; 700 for (i=0; i<m; i++) { 701 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 702 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 703 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 704 row++; 705 } 706 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 715 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 716 ierr = MatDestroy(&A);CHKERRQ(ierr); 717 } 718 PetscFunctionReturn(0); 719 } 720 721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 722 { 723 PetscErrorCode ierr; 724 PetscBool iascii,isbinary,isdraw,issocket; 725 726 PetscFunctionBegin; 727 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 729 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 731 732 if (iascii || issocket || isdraw) { 733 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else if (isbinary) { 735 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 741 { 742 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 743 Mat mdn = mat->A; 744 PetscErrorCode ierr; 745 PetscLogDouble isend[5],irecv[5]; 746 747 PetscFunctionBegin; 748 info->block_size = 1.0; 749 750 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 751 752 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 753 isend[3] = info->memory; isend[4] = info->mallocs; 754 if (flag == MAT_LOCAL) { 755 info->nz_used = isend[0]; 756 info->nz_allocated = isend[1]; 757 info->nz_unneeded = isend[2]; 758 info->memory = isend[3]; 759 info->mallocs = isend[4]; 760 } else if (flag == MAT_GLOBAL_MAX) { 761 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 762 763 info->nz_used = irecv[0]; 764 info->nz_allocated = irecv[1]; 765 info->nz_unneeded = irecv[2]; 766 info->memory = irecv[3]; 767 info->mallocs = irecv[4]; 768 } else if (flag == MAT_GLOBAL_SUM) { 769 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 770 771 info->nz_used = irecv[0]; 772 info->nz_allocated = irecv[1]; 773 info->nz_unneeded = irecv[2]; 774 info->memory = irecv[3]; 775 info->mallocs = irecv[4]; 776 } 777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 778 info->fill_ratio_needed = 0; 779 info->factor_mallocs = 0; 780 PetscFunctionReturn(0); 781 } 782 783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 784 { 785 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 786 PetscErrorCode ierr; 787 788 PetscFunctionBegin; 789 switch (op) { 790 case MAT_NEW_NONZERO_LOCATIONS: 791 case MAT_NEW_NONZERO_LOCATION_ERR: 792 case MAT_NEW_NONZERO_ALLOCATION_ERR: 793 MatCheckPreallocated(A,1); 794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 795 break; 796 case MAT_ROW_ORIENTED: 797 MatCheckPreallocated(A,1); 798 a->roworiented = flg; 799 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 800 break; 801 case MAT_FORCE_DIAGONAL_ENTRIES: 802 case MAT_KEEP_NONZERO_PATTERN: 803 case MAT_USE_HASH_TABLE: 804 case MAT_SORTED_FULL: 805 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 806 break; 807 case MAT_IGNORE_OFF_PROC_ENTRIES: 808 a->donotstash = flg; 809 break; 810 case MAT_SYMMETRIC: 811 case MAT_STRUCTURALLY_SYMMETRIC: 812 case MAT_HERMITIAN: 813 case MAT_SYMMETRY_ETERNAL: 814 case MAT_IGNORE_LOWER_TRIANGULAR: 815 case MAT_IGNORE_ZERO_ENTRIES: 816 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 817 break; 818 default: 819 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 825 { 826 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 827 const PetscScalar *l; 828 PetscScalar x,*v,*vv,*r; 829 PetscErrorCode ierr; 830 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 831 832 PetscFunctionBegin; 833 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 834 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 835 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 836 if (ll) { 837 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 838 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 839 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 840 for (i=0; i<m; i++) { 841 x = l[i]; 842 v = vv + i; 843 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 844 } 845 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 846 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 847 } 848 if (rr) { 849 const PetscScalar *ar; 850 851 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 852 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 853 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 854 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 855 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr); 856 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr); 857 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 858 for (i=0; i<n; i++) { 859 x = r[i]; 860 v = vv + i*lda; 861 for (j=0; j<m; j++) (*v++) *= x; 862 } 863 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 864 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 865 } 866 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 871 { 872 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 873 PetscErrorCode ierr; 874 PetscInt i,j; 875 PetscMPIInt size; 876 PetscReal sum = 0.0; 877 const PetscScalar *av,*v; 878 879 PetscFunctionBegin; 880 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 881 v = av; 882 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 883 if (size == 1) { 884 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 885 } else { 886 if (type == NORM_FROBENIUS) { 887 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 889 } 890 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 891 *nrm = PetscSqrtReal(*nrm); 892 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 893 } else if (type == NORM_1) { 894 PetscReal *tmp,*tmp2; 895 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 896 *nrm = 0.0; 897 v = av; 898 for (j=0; j<mdn->A->cmap->n; j++) { 899 for (i=0; i<mdn->A->rmap->n; i++) { 900 tmp[j] += PetscAbsScalar(*v); v++; 901 } 902 } 903 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 904 for (j=0; j<A->cmap->N; j++) { 905 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 906 } 907 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 908 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 909 } else if (type == NORM_INFINITY) { /* max row norm */ 910 PetscReal ntemp; 911 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 912 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 913 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 914 } 915 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 920 { 921 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 922 Mat B; 923 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 924 PetscErrorCode ierr; 925 PetscInt j,i,lda; 926 PetscScalar *v; 927 928 PetscFunctionBegin; 929 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 930 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 931 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 932 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 933 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 934 } else B = *matout; 935 936 m = a->A->rmap->n; n = a->A->cmap->n; 937 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 938 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 939 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 940 for (i=0; i<m; i++) rwork[i] = rstart + i; 941 for (j=0; j<n; j++) { 942 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 943 v += lda; 944 } 945 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 946 ierr = PetscFree(rwork);CHKERRQ(ierr); 947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 949 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 950 *matout = B; 951 } else { 952 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 959 960 PetscErrorCode MatSetUp_MPIDense(Mat A) 961 { 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 966 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 967 if (!A->preallocated) { 968 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 974 { 975 PetscErrorCode ierr; 976 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 977 978 PetscFunctionBegin; 979 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatConjugate_MPIDense(Mat mat) 984 { 985 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatConjugate(a->A);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PetscErrorCode MatRealPart_MPIDense(Mat A) 994 { 995 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1004 { 1005 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1014 { 1015 PetscErrorCode ierr; 1016 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1017 1018 PetscFunctionBegin; 1019 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1020 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1021 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1026 1027 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1028 { 1029 PetscErrorCode ierr; 1030 PetscInt i,n; 1031 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1032 PetscReal *work; 1033 1034 PetscFunctionBegin; 1035 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1036 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1037 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1038 if (type == NORM_2) { 1039 for (i=0; i<n; i++) work[i] *= work[i]; 1040 } 1041 if (type == NORM_INFINITY) { 1042 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1043 } else { 1044 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1045 } 1046 ierr = PetscFree(work);CHKERRQ(ierr); 1047 if (type == NORM_2) { 1048 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #if defined(PETSC_HAVE_CUDA) 1054 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1055 { 1056 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1057 PetscErrorCode ierr; 1058 PetscInt lda; 1059 1060 PetscFunctionBegin; 1061 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1062 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1063 if (!a->cvec) { 1064 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1066 } 1067 a->vecinuse = col + 1; 1068 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1069 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1070 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1071 *v = a->cvec; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1076 { 1077 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1082 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1083 a->vecinuse = 0; 1084 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1085 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1086 *v = NULL; 1087 PetscFunctionReturn(0); 1088 } 1089 1090 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1091 { 1092 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1093 PetscInt lda; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1098 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1099 if (!a->cvec) { 1100 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1101 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1102 } 1103 a->vecinuse = col + 1; 1104 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1105 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1106 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1107 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1108 *v = a->cvec; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1113 { 1114 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1119 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1120 a->vecinuse = 0; 1121 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1122 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1123 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1124 *v = NULL; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1129 { 1130 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1131 PetscErrorCode ierr; 1132 PetscInt lda; 1133 1134 PetscFunctionBegin; 1135 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1136 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1137 if (!a->cvec) { 1138 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1139 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1140 } 1141 a->vecinuse = col + 1; 1142 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1143 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1144 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1145 *v = a->cvec; 1146 PetscFunctionReturn(0); 1147 } 1148 1149 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1150 { 1151 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1156 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1157 a->vecinuse = 0; 1158 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1159 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1160 *v = NULL; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1165 { 1166 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1171 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1172 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1177 { 1178 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1183 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1184 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1189 { 1190 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1195 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1196 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1201 { 1202 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1211 { 1212 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1213 PetscErrorCode ierr; 1214 1215 PetscFunctionBegin; 1216 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1221 { 1222 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1231 { 1232 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1241 { 1242 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1251 { 1252 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1261 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1262 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1263 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1264 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1265 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1266 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1267 1268 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1269 { 1270 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1275 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1276 if (d->A) { 1277 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1278 } 1279 mat->boundtocpu = bind; 1280 if (!bind) { 1281 PetscBool iscuda; 1282 1283 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1284 if (!iscuda) { 1285 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1286 } 1287 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1288 if (!iscuda) { 1289 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1290 } 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1297 } else { 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1304 } 1305 if (d->cmat) { 1306 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1312 { 1313 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1314 PetscErrorCode ierr; 1315 PetscBool iscuda; 1316 1317 PetscFunctionBegin; 1318 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1319 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1320 if (!iscuda) PetscFunctionReturn(0); 1321 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1322 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1323 if (!d->A) { 1324 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1325 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1326 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1327 } 1328 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1329 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1330 A->preallocated = PETSC_TRUE; 1331 PetscFunctionReturn(0); 1332 } 1333 #endif 1334 1335 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1336 { 1337 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1338 PetscErrorCode ierr; 1339 1340 PetscFunctionBegin; 1341 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1346 { 1347 PetscFunctionBegin; 1348 *missing = PETSC_FALSE; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1353 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1354 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1355 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1356 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1357 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1358 1359 /* -------------------------------------------------------------------*/ 1360 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1361 MatGetRow_MPIDense, 1362 MatRestoreRow_MPIDense, 1363 MatMult_MPIDense, 1364 /* 4*/ MatMultAdd_MPIDense, 1365 MatMultTranspose_MPIDense, 1366 MatMultTransposeAdd_MPIDense, 1367 NULL, 1368 NULL, 1369 NULL, 1370 /* 10*/ NULL, 1371 NULL, 1372 NULL, 1373 NULL, 1374 MatTranspose_MPIDense, 1375 /* 15*/ MatGetInfo_MPIDense, 1376 MatEqual_MPIDense, 1377 MatGetDiagonal_MPIDense, 1378 MatDiagonalScale_MPIDense, 1379 MatNorm_MPIDense, 1380 /* 20*/ MatAssemblyBegin_MPIDense, 1381 MatAssemblyEnd_MPIDense, 1382 MatSetOption_MPIDense, 1383 MatZeroEntries_MPIDense, 1384 /* 24*/ MatZeroRows_MPIDense, 1385 NULL, 1386 NULL, 1387 NULL, 1388 NULL, 1389 /* 29*/ MatSetUp_MPIDense, 1390 NULL, 1391 NULL, 1392 MatGetDiagonalBlock_MPIDense, 1393 NULL, 1394 /* 34*/ MatDuplicate_MPIDense, 1395 NULL, 1396 NULL, 1397 NULL, 1398 NULL, 1399 /* 39*/ MatAXPY_MPIDense, 1400 MatCreateSubMatrices_MPIDense, 1401 NULL, 1402 MatGetValues_MPIDense, 1403 MatCopy_MPIDense, 1404 /* 44*/ NULL, 1405 MatScale_MPIDense, 1406 MatShift_Basic, 1407 NULL, 1408 NULL, 1409 /* 49*/ MatSetRandom_MPIDense, 1410 NULL, 1411 NULL, 1412 NULL, 1413 NULL, 1414 /* 54*/ NULL, 1415 NULL, 1416 NULL, 1417 NULL, 1418 NULL, 1419 /* 59*/ MatCreateSubMatrix_MPIDense, 1420 MatDestroy_MPIDense, 1421 MatView_MPIDense, 1422 NULL, 1423 NULL, 1424 /* 64*/ NULL, 1425 NULL, 1426 NULL, 1427 NULL, 1428 NULL, 1429 /* 69*/ NULL, 1430 NULL, 1431 NULL, 1432 NULL, 1433 NULL, 1434 /* 74*/ NULL, 1435 NULL, 1436 NULL, 1437 NULL, 1438 NULL, 1439 /* 79*/ NULL, 1440 NULL, 1441 NULL, 1442 NULL, 1443 /* 83*/ MatLoad_MPIDense, 1444 NULL, 1445 NULL, 1446 NULL, 1447 NULL, 1448 NULL, 1449 /* 89*/ NULL, 1450 NULL, 1451 NULL, 1452 NULL, 1453 NULL, 1454 /* 94*/ NULL, 1455 NULL, 1456 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1457 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1458 NULL, 1459 /* 99*/ MatProductSetFromOptions_MPIDense, 1460 NULL, 1461 NULL, 1462 MatConjugate_MPIDense, 1463 NULL, 1464 /*104*/ NULL, 1465 MatRealPart_MPIDense, 1466 MatImaginaryPart_MPIDense, 1467 NULL, 1468 NULL, 1469 /*109*/ NULL, 1470 NULL, 1471 NULL, 1472 MatGetColumnVector_MPIDense, 1473 MatMissingDiagonal_MPIDense, 1474 /*114*/ NULL, 1475 NULL, 1476 NULL, 1477 NULL, 1478 NULL, 1479 /*119*/ NULL, 1480 NULL, 1481 NULL, 1482 NULL, 1483 NULL, 1484 /*124*/ NULL, 1485 MatGetColumnNorms_MPIDense, 1486 NULL, 1487 NULL, 1488 NULL, 1489 /*129*/ NULL, 1490 NULL, 1491 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1492 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1493 NULL, 1494 /*134*/ NULL, 1495 NULL, 1496 NULL, 1497 NULL, 1498 NULL, 1499 /*139*/ NULL, 1500 NULL, 1501 NULL, 1502 NULL, 1503 NULL, 1504 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1505 /*145*/ NULL, 1506 NULL, 1507 NULL 1508 }; 1509 1510 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1511 { 1512 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1513 PetscBool iscuda; 1514 PetscErrorCode ierr; 1515 1516 PetscFunctionBegin; 1517 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1518 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1519 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1520 if (!a->A) { 1521 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1522 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1523 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1524 } 1525 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1526 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1527 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1528 mat->preallocated = PETSC_TRUE; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1533 { 1534 Mat B,C; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr); 1539 ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1540 ierr = MatDestroy(&C);CHKERRQ(ierr); 1541 if (reuse == MAT_REUSE_MATRIX) { 1542 C = *newmat; 1543 } else C = NULL; 1544 ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1545 ierr = MatDestroy(&B);CHKERRQ(ierr); 1546 if (reuse == MAT_INPLACE_MATRIX) { 1547 ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr); 1548 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1549 PetscFunctionReturn(0); 1550 } 1551 1552 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1553 { 1554 Mat B,C; 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr); 1559 ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1560 if (reuse == MAT_REUSE_MATRIX) { 1561 C = *newmat; 1562 } else C = NULL; 1563 ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1564 ierr = MatDestroy(&B);CHKERRQ(ierr); 1565 if (reuse == MAT_INPLACE_MATRIX) { 1566 ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr); 1567 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #if defined(PETSC_HAVE_ELEMENTAL) 1572 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1573 { 1574 Mat mat_elemental; 1575 PetscErrorCode ierr; 1576 PetscScalar *v; 1577 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1578 1579 PetscFunctionBegin; 1580 if (reuse == MAT_REUSE_MATRIX) { 1581 mat_elemental = *newmat; 1582 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1583 } else { 1584 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1585 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1586 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1587 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1588 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1589 } 1590 1591 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1592 for (i=0; i<N; i++) cols[i] = i; 1593 for (i=0; i<m; i++) rows[i] = rstart + i; 1594 1595 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1596 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1597 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1598 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1599 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1600 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1601 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1602 1603 if (reuse == MAT_INPLACE_MATRIX) { 1604 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1605 } else { 1606 *newmat = mat_elemental; 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 #endif 1611 1612 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1613 { 1614 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1623 { 1624 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1633 { 1634 PetscErrorCode ierr; 1635 Mat_MPIDense *mat; 1636 PetscInt m,nloc,N; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1640 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1641 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1642 PetscInt sum; 1643 1644 if (n == PETSC_DECIDE) { 1645 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1646 } 1647 /* Check sum(n) = N */ 1648 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1649 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1650 1651 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1652 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1653 } 1654 1655 /* numeric phase */ 1656 mat = (Mat_MPIDense*)(*outmat)->data; 1657 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1658 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #if defined(PETSC_HAVE_CUDA) 1664 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1665 { 1666 Mat B; 1667 Mat_MPIDense *m; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 if (reuse == MAT_INITIAL_MATRIX) { 1672 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1673 } else if (reuse == MAT_REUSE_MATRIX) { 1674 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1675 } 1676 1677 B = *newmat; 1678 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1679 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1680 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1681 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1696 m = (Mat_MPIDense*)(B)->data; 1697 if (m->A) { 1698 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1699 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1700 } 1701 B->ops->bindtocpu = NULL; 1702 B->offloadmask = PETSC_OFFLOAD_CPU; 1703 PetscFunctionReturn(0); 1704 } 1705 1706 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1707 { 1708 Mat B; 1709 Mat_MPIDense *m; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 if (reuse == MAT_INITIAL_MATRIX) { 1714 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1715 } else if (reuse == MAT_REUSE_MATRIX) { 1716 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1717 } 1718 1719 B = *newmat; 1720 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1721 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1722 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1723 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1724 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1725 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1726 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1727 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1728 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1729 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1730 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1732 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1734 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1735 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1737 m = (Mat_MPIDense*)(B)->data; 1738 if (m->A) { 1739 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1740 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1741 B->offloadmask = PETSC_OFFLOAD_BOTH; 1742 } else { 1743 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1744 } 1745 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1746 1747 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1748 PetscFunctionReturn(0); 1749 } 1750 #endif 1751 1752 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1753 { 1754 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1755 PetscErrorCode ierr; 1756 PetscInt lda; 1757 1758 PetscFunctionBegin; 1759 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1760 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1761 if (!a->cvec) { 1762 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1763 } 1764 a->vecinuse = col + 1; 1765 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1766 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1767 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1768 *v = a->cvec; 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1773 { 1774 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1779 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1780 a->vecinuse = 0; 1781 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1782 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1783 *v = NULL; 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1788 { 1789 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1790 PetscErrorCode ierr; 1791 PetscInt lda; 1792 1793 PetscFunctionBegin; 1794 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1795 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1796 if (!a->cvec) { 1797 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1798 } 1799 a->vecinuse = col + 1; 1800 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1801 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1802 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1803 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1804 *v = a->cvec; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1809 { 1810 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1811 PetscErrorCode ierr; 1812 1813 PetscFunctionBegin; 1814 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1815 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1816 a->vecinuse = 0; 1817 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1818 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1819 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1820 *v = NULL; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1825 { 1826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1827 PetscErrorCode ierr; 1828 PetscInt lda; 1829 1830 PetscFunctionBegin; 1831 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1832 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1833 if (!a->cvec) { 1834 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1835 } 1836 a->vecinuse = col + 1; 1837 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1838 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1839 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1840 *v = a->cvec; 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1845 { 1846 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1847 PetscErrorCode ierr; 1848 1849 PetscFunctionBegin; 1850 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1851 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1852 a->vecinuse = 0; 1853 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1854 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1855 *v = NULL; 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1860 { 1861 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1862 Mat_MPIDense *c; 1863 PetscErrorCode ierr; 1864 MPI_Comm comm; 1865 PetscBool setup = PETSC_FALSE; 1866 1867 PetscFunctionBegin; 1868 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1869 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1870 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1871 if (!a->cmat) { 1872 setup = PETSC_TRUE; 1873 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1874 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1875 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1876 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1877 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1878 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1879 } else if (cend-cbegin != a->cmat->cmap->N) { 1880 setup = PETSC_TRUE; 1881 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1882 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1883 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1884 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1885 } 1886 c = (Mat_MPIDense*)a->cmat->data; 1887 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1888 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1889 if (setup) { /* do we really need this? */ 1890 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1891 } 1892 a->cmat->preallocated = PETSC_TRUE; 1893 a->cmat->assembled = PETSC_TRUE; 1894 a->matinuse = cbegin + 1; 1895 *v = a->cmat; 1896 PetscFunctionReturn(0); 1897 } 1898 1899 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1900 { 1901 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1902 Mat_MPIDense *c; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1907 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1908 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1909 a->matinuse = 0; 1910 c = (Mat_MPIDense*)a->cmat->data; 1911 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1912 *v = NULL; 1913 PetscFunctionReturn(0); 1914 } 1915 1916 /*MC 1917 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1918 1919 Options Database Keys: 1920 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1921 1922 Level: beginner 1923 1924 .seealso: MatCreateDense() 1925 1926 M*/ 1927 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1928 { 1929 Mat_MPIDense *a; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1934 mat->data = (void*)a; 1935 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1936 1937 mat->insertmode = NOT_SET_VALUES; 1938 1939 /* build cache for off array entries formed */ 1940 a->donotstash = PETSC_FALSE; 1941 1942 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1943 1944 /* stuff used for matrix vector multiply */ 1945 a->lvec = NULL; 1946 a->Mvctx = NULL; 1947 a->roworiented = PETSC_TRUE; 1948 1949 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1950 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1951 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1952 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1953 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1954 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1955 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1956 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1957 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1958 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1959 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1960 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1961 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1962 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1963 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1964 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1965 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1966 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1967 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1968 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 1969 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr); 1970 #if defined(PETSC_HAVE_ELEMENTAL) 1971 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1972 #endif 1973 #if defined(PETSC_HAVE_SCALAPACK) 1974 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1975 #endif 1976 #if defined(PETSC_HAVE_CUDA) 1977 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1978 #endif 1979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1982 #if defined(PETSC_HAVE_CUDA) 1983 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1984 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1985 #endif 1986 1987 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1988 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1989 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 /*MC 1994 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1995 1996 Options Database Keys: 1997 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1998 1999 Level: beginner 2000 2001 .seealso: 2002 2003 M*/ 2004 #if defined(PETSC_HAVE_CUDA) 2005 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2006 { 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 2011 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 2012 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017 /*MC 2018 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2019 2020 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 2021 and MATMPIDENSE otherwise. 2022 2023 Options Database Keys: 2024 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 2025 2026 Level: beginner 2027 2028 2029 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 2030 M*/ 2031 2032 /*MC 2033 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2034 2035 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 2036 and MATMPIDENSECUDA otherwise. 2037 2038 Options Database Keys: 2039 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 2040 2041 Level: beginner 2042 2043 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 2044 M*/ 2045 2046 /*@C 2047 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2048 2049 Collective 2050 2051 Input Parameters: 2052 . B - the matrix 2053 - data - optional location of matrix data. Set data=NULL for PETSc 2054 to control all matrix memory allocation. 2055 2056 Notes: 2057 The dense format is fully compatible with standard Fortran 77 2058 storage by columns. 2059 2060 The data input variable is intended primarily for Fortran programmers 2061 who wish to allocate their own matrix memory space. Most users should 2062 set data=NULL. 2063 2064 Level: intermediate 2065 2066 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2067 @*/ 2068 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2069 { 2070 PetscErrorCode ierr; 2071 2072 PetscFunctionBegin; 2073 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2074 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /*@ 2079 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2080 array provided by the user. This is useful to avoid copying an array 2081 into a matrix 2082 2083 Not Collective 2084 2085 Input Parameters: 2086 + mat - the matrix 2087 - array - the array in column major order 2088 2089 Notes: 2090 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2091 freed when the matrix is destroyed. 2092 2093 Level: developer 2094 2095 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2096 2097 @*/ 2098 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2099 { 2100 PetscErrorCode ierr; 2101 2102 PetscFunctionBegin; 2103 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2104 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2105 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2106 #if defined(PETSC_HAVE_CUDA) 2107 mat->offloadmask = PETSC_OFFLOAD_CPU; 2108 #endif 2109 PetscFunctionReturn(0); 2110 } 2111 2112 /*@ 2113 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2114 2115 Not Collective 2116 2117 Input Parameters: 2118 . mat - the matrix 2119 2120 Notes: 2121 You can only call this after a call to MatDensePlaceArray() 2122 2123 Level: developer 2124 2125 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2126 2127 @*/ 2128 PetscErrorCode MatDenseResetArray(Mat mat) 2129 { 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2134 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2135 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /*@ 2140 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2141 array provided by the user. This is useful to avoid copying an array 2142 into a matrix 2143 2144 Not Collective 2145 2146 Input Parameters: 2147 + mat - the matrix 2148 - array - the array in column major order 2149 2150 Notes: 2151 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2152 freed by the user. It will be freed when the matrix is destroyed. 2153 2154 Level: developer 2155 2156 .seealso: MatDenseGetArray(), VecReplaceArray() 2157 @*/ 2158 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2159 { 2160 PetscErrorCode ierr; 2161 2162 PetscFunctionBegin; 2163 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2164 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2165 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2166 #if defined(PETSC_HAVE_CUDA) 2167 mat->offloadmask = PETSC_OFFLOAD_CPU; 2168 #endif 2169 PetscFunctionReturn(0); 2170 } 2171 2172 #if defined(PETSC_HAVE_CUDA) 2173 /*@C 2174 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2175 array provided by the user. This is useful to avoid copying an array 2176 into a matrix 2177 2178 Not Collective 2179 2180 Input Parameters: 2181 + mat - the matrix 2182 - array - the array in column major order 2183 2184 Notes: 2185 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2186 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2187 2188 Level: developer 2189 2190 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2191 @*/ 2192 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2193 { 2194 PetscErrorCode ierr; 2195 2196 PetscFunctionBegin; 2197 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2198 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2199 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2200 mat->offloadmask = PETSC_OFFLOAD_GPU; 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /*@C 2205 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2206 2207 Not Collective 2208 2209 Input Parameters: 2210 . mat - the matrix 2211 2212 Notes: 2213 You can only call this after a call to MatDenseCUDAPlaceArray() 2214 2215 Level: developer 2216 2217 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2218 2219 @*/ 2220 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2221 { 2222 PetscErrorCode ierr; 2223 2224 PetscFunctionBegin; 2225 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2226 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2227 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 /*@C 2232 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2233 array provided by the user. This is useful to avoid copying an array 2234 into a matrix 2235 2236 Not Collective 2237 2238 Input Parameters: 2239 + mat - the matrix 2240 - array - the array in column major order 2241 2242 Notes: 2243 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2244 The memory passed in CANNOT be freed by the user. It will be freed 2245 when the matrix is destroyed. The array should respect the matrix leading dimension. 2246 2247 Level: developer 2248 2249 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2250 @*/ 2251 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2252 { 2253 PetscErrorCode ierr; 2254 2255 PetscFunctionBegin; 2256 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2257 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2258 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2259 mat->offloadmask = PETSC_OFFLOAD_GPU; 2260 PetscFunctionReturn(0); 2261 } 2262 2263 /*@C 2264 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2265 2266 Not Collective 2267 2268 Input Parameters: 2269 . A - the matrix 2270 2271 Output Parameters 2272 . array - the GPU array in column major order 2273 2274 Notes: 2275 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2276 2277 Level: developer 2278 2279 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2280 @*/ 2281 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2282 { 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2287 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2288 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 /*@C 2293 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2294 2295 Not Collective 2296 2297 Input Parameters: 2298 + A - the matrix 2299 - array - the GPU array in column major order 2300 2301 Notes: 2302 2303 Level: developer 2304 2305 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2306 @*/ 2307 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2308 { 2309 PetscErrorCode ierr; 2310 2311 PetscFunctionBegin; 2312 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2313 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2314 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2315 A->offloadmask = PETSC_OFFLOAD_GPU; 2316 PetscFunctionReturn(0); 2317 } 2318 2319 /*@C 2320 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2321 2322 Not Collective 2323 2324 Input Parameters: 2325 . A - the matrix 2326 2327 Output Parameters 2328 . array - the GPU array in column major order 2329 2330 Notes: 2331 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2332 2333 Level: developer 2334 2335 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2336 @*/ 2337 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2338 { 2339 PetscErrorCode ierr; 2340 2341 PetscFunctionBegin; 2342 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2343 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2344 PetscFunctionReturn(0); 2345 } 2346 2347 /*@C 2348 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2349 2350 Not Collective 2351 2352 Input Parameters: 2353 + A - the matrix 2354 - array - the GPU array in column major order 2355 2356 Notes: 2357 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2358 2359 Level: developer 2360 2361 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2362 @*/ 2363 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2364 { 2365 PetscErrorCode ierr; 2366 2367 PetscFunctionBegin; 2368 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2369 PetscFunctionReturn(0); 2370 } 2371 2372 /*@C 2373 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2374 2375 Not Collective 2376 2377 Input Parameters: 2378 . A - the matrix 2379 2380 Output Parameters 2381 . array - the GPU array in column major order 2382 2383 Notes: 2384 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2385 2386 Level: developer 2387 2388 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2389 @*/ 2390 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2391 { 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2396 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2397 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2398 PetscFunctionReturn(0); 2399 } 2400 2401 /*@C 2402 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2403 2404 Not Collective 2405 2406 Input Parameters: 2407 + A - the matrix 2408 - array - the GPU array in column major order 2409 2410 Notes: 2411 2412 Level: developer 2413 2414 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2415 @*/ 2416 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2417 { 2418 PetscErrorCode ierr; 2419 2420 PetscFunctionBegin; 2421 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2422 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2423 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2424 A->offloadmask = PETSC_OFFLOAD_GPU; 2425 PetscFunctionReturn(0); 2426 } 2427 #endif 2428 2429 /*@C 2430 MatCreateDense - Creates a matrix in dense format. 2431 2432 Collective 2433 2434 Input Parameters: 2435 + comm - MPI communicator 2436 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2437 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2438 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2439 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2440 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2441 to control all matrix memory allocation. 2442 2443 Output Parameter: 2444 . A - the matrix 2445 2446 Notes: 2447 The dense format is fully compatible with standard Fortran 77 2448 storage by columns. 2449 Note that, although local portions of the matrix are stored in column-major 2450 order, the matrix is partitioned across MPI ranks by row. 2451 2452 The data input variable is intended primarily for Fortran programmers 2453 who wish to allocate their own matrix memory space. Most users should 2454 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2455 2456 The user MUST specify either the local or global matrix dimensions 2457 (possibly both). 2458 2459 Level: intermediate 2460 2461 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2462 @*/ 2463 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2464 { 2465 PetscErrorCode ierr; 2466 PetscMPIInt size; 2467 2468 PetscFunctionBegin; 2469 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2470 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2471 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2472 if (size > 1) { 2473 PetscBool havedata = (PetscBool)!!data; 2474 2475 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2476 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2477 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 2478 if (havedata) { /* user provided data array, so no need to assemble */ 2479 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2480 (*A)->assembled = PETSC_TRUE; 2481 } 2482 } else { 2483 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2484 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2485 } 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #if defined(PETSC_HAVE_CUDA) 2490 /*@C 2491 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2492 2493 Collective 2494 2495 Input Parameters: 2496 + comm - MPI communicator 2497 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2498 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2499 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2500 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2501 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2502 to control matrix memory allocation. 2503 2504 Output Parameter: 2505 . A - the matrix 2506 2507 Notes: 2508 2509 Level: intermediate 2510 2511 .seealso: MatCreate(), MatCreateDense() 2512 @*/ 2513 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2514 { 2515 PetscErrorCode ierr; 2516 PetscMPIInt size; 2517 2518 PetscFunctionBegin; 2519 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2520 PetscValidLogicalCollectiveBool(*A,!!data,6); 2521 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2522 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2523 if (size > 1) { 2524 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2525 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2526 if (data) { /* user provided data array, so no need to assemble */ 2527 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2528 (*A)->assembled = PETSC_TRUE; 2529 } 2530 } else { 2531 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2532 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2533 } 2534 PetscFunctionReturn(0); 2535 } 2536 #endif 2537 2538 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2539 { 2540 Mat mat; 2541 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2542 PetscErrorCode ierr; 2543 2544 PetscFunctionBegin; 2545 *newmat = NULL; 2546 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2547 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2548 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2549 a = (Mat_MPIDense*)mat->data; 2550 2551 mat->factortype = A->factortype; 2552 mat->assembled = PETSC_TRUE; 2553 mat->preallocated = PETSC_TRUE; 2554 2555 mat->insertmode = NOT_SET_VALUES; 2556 a->donotstash = oldmat->donotstash; 2557 2558 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2559 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2560 2561 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2562 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2563 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2564 2565 *newmat = mat; 2566 PetscFunctionReturn(0); 2567 } 2568 2569 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2570 { 2571 PetscErrorCode ierr; 2572 PetscBool isbinary; 2573 #if defined(PETSC_HAVE_HDF5) 2574 PetscBool ishdf5; 2575 #endif 2576 2577 PetscFunctionBegin; 2578 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2579 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2580 /* force binary viewer to load .info file if it has not yet done so */ 2581 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2583 #if defined(PETSC_HAVE_HDF5) 2584 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2585 #endif 2586 if (isbinary) { 2587 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2588 #if defined(PETSC_HAVE_HDF5) 2589 } else if (ishdf5) { 2590 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2591 #endif 2592 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2597 { 2598 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2599 Mat a,b; 2600 PetscBool flg; 2601 PetscErrorCode ierr; 2602 2603 PetscFunctionBegin; 2604 a = matA->A; 2605 b = matB->A; 2606 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2607 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2612 { 2613 PetscErrorCode ierr; 2614 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2615 2616 PetscFunctionBegin; 2617 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2618 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2619 ierr = PetscFree(atb);CHKERRQ(ierr); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2624 { 2625 PetscErrorCode ierr; 2626 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2627 2628 PetscFunctionBegin; 2629 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2630 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2631 ierr = PetscFree(abt);CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2636 { 2637 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2638 Mat_TransMatMultDense *atb; 2639 PetscErrorCode ierr; 2640 MPI_Comm comm; 2641 PetscMPIInt size,*recvcounts; 2642 PetscScalar *carray,*sendbuf; 2643 const PetscScalar *atbarray; 2644 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2645 const PetscInt *ranges; 2646 2647 PetscFunctionBegin; 2648 MatCheckProduct(C,3); 2649 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2650 atb = (Mat_TransMatMultDense *)C->product->data; 2651 recvcounts = atb->recvcounts; 2652 sendbuf = atb->sendbuf; 2653 2654 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2655 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2656 2657 /* compute atbarray = aseq^T * bseq */ 2658 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2659 2660 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2661 2662 /* arrange atbarray into sendbuf */ 2663 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2664 for (proc=0, k=0; proc<size; proc++) { 2665 for (j=0; j<cN; j++) { 2666 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2667 } 2668 } 2669 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2670 2671 /* sum all atbarray to local values of C */ 2672 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2673 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr); 2674 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2675 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2676 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2677 PetscFunctionReturn(0); 2678 } 2679 2680 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2681 { 2682 PetscErrorCode ierr; 2683 MPI_Comm comm; 2684 PetscMPIInt size; 2685 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2686 Mat_TransMatMultDense *atb; 2687 PetscBool cisdense; 2688 PetscInt i; 2689 const PetscInt *ranges; 2690 2691 PetscFunctionBegin; 2692 MatCheckProduct(C,3); 2693 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2694 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2695 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2696 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2697 } 2698 2699 /* create matrix product C */ 2700 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2701 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2702 if (!cisdense) { 2703 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2704 } 2705 ierr = MatSetUp(C);CHKERRQ(ierr); 2706 2707 /* create data structure for reuse C */ 2708 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2709 ierr = PetscNew(&atb);CHKERRQ(ierr); 2710 cM = C->rmap->N; 2711 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2712 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2713 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2714 2715 C->product->data = atb; 2716 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2717 PetscFunctionReturn(0); 2718 } 2719 2720 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2721 { 2722 PetscErrorCode ierr; 2723 MPI_Comm comm; 2724 PetscMPIInt i, size; 2725 PetscInt maxRows, bufsiz; 2726 PetscMPIInt tag; 2727 PetscInt alg; 2728 Mat_MatTransMultDense *abt; 2729 Mat_Product *product = C->product; 2730 PetscBool flg; 2731 2732 PetscFunctionBegin; 2733 MatCheckProduct(C,4); 2734 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2735 /* check local size of A and B */ 2736 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2737 2738 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2739 alg = flg ? 0 : 1; 2740 2741 /* setup matrix product C */ 2742 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2743 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2744 ierr = MatSetUp(C);CHKERRQ(ierr); 2745 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2746 2747 /* create data structure for reuse C */ 2748 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2749 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2750 ierr = PetscNew(&abt);CHKERRQ(ierr); 2751 abt->tag = tag; 2752 abt->alg = alg; 2753 switch (alg) { 2754 case 1: /* alg: "cyclic" */ 2755 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2756 bufsiz = A->cmap->N * maxRows; 2757 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2758 break; 2759 default: /* alg: "allgatherv" */ 2760 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2761 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2762 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2763 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2764 break; 2765 } 2766 2767 C->product->data = abt; 2768 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2769 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2770 PetscFunctionReturn(0); 2771 } 2772 2773 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2774 { 2775 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2776 Mat_MatTransMultDense *abt; 2777 PetscErrorCode ierr; 2778 MPI_Comm comm; 2779 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2780 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2781 PetscInt i,cK=A->cmap->N,k,j,bn; 2782 PetscScalar _DOne=1.0,_DZero=0.0; 2783 const PetscScalar *av,*bv; 2784 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2785 MPI_Request reqs[2]; 2786 const PetscInt *ranges; 2787 2788 PetscFunctionBegin; 2789 MatCheckProduct(C,3); 2790 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2791 abt = (Mat_MatTransMultDense*)C->product->data; 2792 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2793 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2794 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2795 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2796 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2797 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2798 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2799 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2800 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2801 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2802 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2803 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2804 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2805 bn = B->rmap->n; 2806 if (blda == bn) { 2807 sendbuf = (PetscScalar*)bv; 2808 } else { 2809 sendbuf = abt->buf[0]; 2810 for (k = 0, i = 0; i < cK; i++) { 2811 for (j = 0; j < bn; j++, k++) { 2812 sendbuf[k] = bv[i * blda + j]; 2813 } 2814 } 2815 } 2816 if (size > 1) { 2817 sendto = (rank + size - 1) % size; 2818 recvfrom = (rank + size + 1) % size; 2819 } else { 2820 sendto = recvfrom = 0; 2821 } 2822 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2823 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2824 recvisfrom = rank; 2825 for (i = 0; i < size; i++) { 2826 /* we have finished receiving in sending, bufs can be read/modified */ 2827 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2828 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2829 2830 if (nextrecvisfrom != rank) { 2831 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2832 sendsiz = cK * bn; 2833 recvsiz = cK * nextbn; 2834 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2835 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr); 2836 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr); 2837 } 2838 2839 /* local aseq * sendbuf^T */ 2840 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2841 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2842 2843 if (nextrecvisfrom != rank) { 2844 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2845 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 2846 } 2847 bn = nextbn; 2848 recvisfrom = nextrecvisfrom; 2849 sendbuf = recvbuf; 2850 } 2851 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2852 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2853 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2854 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2860 { 2861 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2862 Mat_MatTransMultDense *abt; 2863 PetscErrorCode ierr; 2864 MPI_Comm comm; 2865 PetscMPIInt size; 2866 PetscScalar *cv, *sendbuf, *recvbuf; 2867 const PetscScalar *av,*bv; 2868 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2869 PetscScalar _DOne=1.0,_DZero=0.0; 2870 PetscBLASInt cm, cn, ck, alda, clda; 2871 2872 PetscFunctionBegin; 2873 MatCheckProduct(C,3); 2874 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2875 abt = (Mat_MatTransMultDense*)C->product->data; 2876 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2877 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2878 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2879 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2880 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2881 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2882 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2883 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2884 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2885 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2886 /* copy transpose of B into buf[0] */ 2887 bn = B->rmap->n; 2888 sendbuf = abt->buf[0]; 2889 recvbuf = abt->buf[1]; 2890 for (k = 0, j = 0; j < bn; j++) { 2891 for (i = 0; i < cK; i++, k++) { 2892 sendbuf[k] = bv[i * blda + j]; 2893 } 2894 } 2895 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2896 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr); 2897 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2898 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2899 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2900 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2901 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2902 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2903 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2904 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 PetscFunctionReturn(0); 2907 } 2908 2909 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2910 { 2911 Mat_MatTransMultDense *abt; 2912 PetscErrorCode ierr; 2913 2914 PetscFunctionBegin; 2915 MatCheckProduct(C,3); 2916 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2917 abt = (Mat_MatTransMultDense*)C->product->data; 2918 switch (abt->alg) { 2919 case 1: 2920 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2921 break; 2922 default: 2923 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2924 break; 2925 } 2926 PetscFunctionReturn(0); 2927 } 2928 2929 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2930 { 2931 PetscErrorCode ierr; 2932 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2933 2934 PetscFunctionBegin; 2935 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2936 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2937 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2938 ierr = PetscFree(ab);CHKERRQ(ierr); 2939 PetscFunctionReturn(0); 2940 } 2941 2942 #if defined(PETSC_HAVE_ELEMENTAL) 2943 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2944 { 2945 PetscErrorCode ierr; 2946 Mat_MatMultDense *ab; 2947 2948 PetscFunctionBegin; 2949 MatCheckProduct(C,3); 2950 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2951 ab = (Mat_MatMultDense*)C->product->data; 2952 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2953 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2954 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2955 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2956 PetscFunctionReturn(0); 2957 } 2958 2959 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2960 { 2961 PetscErrorCode ierr; 2962 Mat Ae,Be,Ce; 2963 Mat_MatMultDense *ab; 2964 2965 PetscFunctionBegin; 2966 MatCheckProduct(C,4); 2967 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2968 /* check local size of A and B */ 2969 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2970 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2971 } 2972 2973 /* create elemental matrices Ae and Be */ 2974 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2975 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2976 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2977 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2978 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2979 2980 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2981 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2982 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2983 ierr = MatSetUp(Be);CHKERRQ(ierr); 2984 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2985 2986 /* compute symbolic Ce = Ae*Be */ 2987 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2988 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2989 2990 /* setup C */ 2991 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2992 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2993 ierr = MatSetUp(C);CHKERRQ(ierr); 2994 2995 /* create data structure for reuse Cdense */ 2996 ierr = PetscNew(&ab);CHKERRQ(ierr); 2997 ab->Ae = Ae; 2998 ab->Be = Be; 2999 ab->Ce = Ce; 3000 3001 C->product->data = ab; 3002 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 3003 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 3004 PetscFunctionReturn(0); 3005 } 3006 #endif 3007 /* ----------------------------------------------- */ 3008 #if defined(PETSC_HAVE_ELEMENTAL) 3009 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 3010 { 3011 PetscFunctionBegin; 3012 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 3013 C->ops->productsymbolic = MatProductSymbolic_AB; 3014 PetscFunctionReturn(0); 3015 } 3016 #endif 3017 3018 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 3019 { 3020 Mat_Product *product = C->product; 3021 Mat A = product->A,B=product->B; 3022 3023 PetscFunctionBegin; 3024 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 3025 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 3026 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 3027 C->ops->productsymbolic = MatProductSymbolic_AtB; 3028 PetscFunctionReturn(0); 3029 } 3030 3031 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 3032 { 3033 PetscErrorCode ierr; 3034 Mat_Product *product = C->product; 3035 const char *algTypes[2] = {"allgatherv","cyclic"}; 3036 PetscInt alg,nalg = 2; 3037 PetscBool flg = PETSC_FALSE; 3038 3039 PetscFunctionBegin; 3040 /* Set default algorithm */ 3041 alg = 0; /* default is allgatherv */ 3042 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 3043 if (flg) { 3044 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3045 } 3046 3047 /* Get runtime option */ 3048 if (product->api_user) { 3049 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 3050 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3051 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3052 } else { 3053 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 3054 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3055 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3056 } 3057 if (flg) { 3058 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3059 } 3060 3061 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3062 C->ops->productsymbolic = MatProductSymbolic_ABt; 3063 PetscFunctionReturn(0); 3064 } 3065 3066 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3067 { 3068 PetscErrorCode ierr; 3069 Mat_Product *product = C->product; 3070 3071 PetscFunctionBegin; 3072 switch (product->type) { 3073 #if defined(PETSC_HAVE_ELEMENTAL) 3074 case MATPRODUCT_AB: 3075 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3076 break; 3077 #endif 3078 case MATPRODUCT_AtB: 3079 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3080 break; 3081 case MATPRODUCT_ABt: 3082 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3083 break; 3084 default: 3085 break; 3086 } 3087 PetscFunctionReturn(0); 3088 } 3089