1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 47 PetscErrorCode ierr; 48 49 PetscFunctionBegin; 50 ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 57 PetscErrorCode ierr; 58 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 59 60 PetscFunctionBegin; 61 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 62 lrow = row - rstart; 63 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 68 { 69 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 70 PetscErrorCode ierr; 71 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 72 73 PetscFunctionBegin; 74 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 75 lrow = row - rstart; 76 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 81 { 82 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 83 PetscErrorCode ierr; 84 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 85 PetscScalar *array; 86 MPI_Comm comm; 87 PetscBool flg; 88 Mat B; 89 90 PetscFunctionBegin; 91 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 92 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 93 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 94 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 95 96 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 97 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 98 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 99 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 100 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 101 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 102 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 103 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 104 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 105 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 107 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 108 *a = B; 109 ierr = MatDestroy(&B);CHKERRQ(ierr); 110 } else *a = B; 111 PetscFunctionReturn(0); 112 } 113 114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 117 PetscErrorCode ierr; 118 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 119 PetscBool roworiented = A->roworiented; 120 121 PetscFunctionBegin; 122 for (i=0; i<m; i++) { 123 if (idxm[i] < 0) continue; 124 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 125 if (idxm[i] >= rstart && idxm[i] < rend) { 126 row = idxm[i] - rstart; 127 if (roworiented) { 128 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 129 } else { 130 for (j=0; j<n; j++) { 131 if (idxn[j] < 0) continue; 132 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 133 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 134 } 135 } 136 } else if (!A->donotstash) { 137 mat->assembled = PETSC_FALSE; 138 if (roworiented) { 139 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 140 } else { 141 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 142 } 143 } 144 } 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 149 { 150 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 153 154 PetscFunctionBegin; 155 for (i=0; i<m; i++) { 156 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 157 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 158 if (idxm[i] >= rstart && idxm[i] < rend) { 159 row = idxm[i] - rstart; 160 for (j=0; j<n; j++) { 161 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 162 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 163 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 164 } 165 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscBool iscuda; 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 if (!a->A) { 188 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 189 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 190 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 191 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 192 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 193 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 194 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 195 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 196 } 197 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 230 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 235 { 236 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 241 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 242 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 247 { 248 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 249 PetscErrorCode ierr; 250 251 PetscFunctionBegin; 252 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 253 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 254 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 259 { 260 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 265 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 266 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 271 { 272 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 273 PetscErrorCode ierr; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 284 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 285 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 288 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 289 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 290 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 291 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 292 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 309 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 310 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 311 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 317 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 318 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 326 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 /* Free work space */ 333 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 334 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 335 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 371 { 372 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 373 PetscErrorCode ierr; 374 PetscInt nstash,reallocs; 375 376 PetscFunctionBegin; 377 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 378 379 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 380 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 381 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 386 { 387 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 388 PetscErrorCode ierr; 389 PetscInt i,*row,*col,flg,j,rstart,ncols; 390 PetscMPIInt n; 391 PetscScalar *val; 392 393 PetscFunctionBegin; 394 if (!mdn->donotstash && !mat->nooffprocentries) { 395 /* wait on receives */ 396 while (1) { 397 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 398 if (!flg) break; 399 400 for (i=0; i<n;) { 401 /* Now identify the consecutive vals belonging to the same row */ 402 for (j=i,rstart=row[j]; j<n; j++) { 403 if (row[j] != rstart) break; 404 } 405 if (j < n) ncols = j-i; 406 else ncols = n-i; 407 /* Now assemble all these values with a single function call */ 408 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 409 i = j; 410 } 411 } 412 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 413 } 414 415 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 417 418 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 419 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 425 { 426 PetscErrorCode ierr; 427 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 428 429 PetscFunctionBegin; 430 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 437 PetscErrorCode ierr; 438 PetscInt i,len,*lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 449 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 450 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 451 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 452 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 453 } 454 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 455 if (diag != 0.0) { 456 Vec d; 457 458 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 459 ierr = VecSet(d,diag);CHKERRQ(ierr); 460 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecDestroy(&d);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(lrows);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype,aymtype; 479 480 PetscFunctionBegin; 481 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 482 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 483 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr); 484 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr); 485 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 486 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 487 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype,aymtype; 498 499 PetscFunctionBegin; 500 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 501 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 502 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr); 503 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr); 504 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 505 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 506 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype,aymtype; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 520 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 522 ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr); 523 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 524 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 525 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 526 ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 PetscErrorCode ierr; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype,aymtype; 537 538 PetscFunctionBegin; 539 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 540 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 541 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 542 ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr); 543 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 544 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 545 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 546 ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 553 PetscErrorCode ierr; 554 PetscInt lda,len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 const PetscScalar *av; 557 558 PetscFunctionBegin; 559 ierr = VecSet(v,zero);CHKERRQ(ierr); 560 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 561 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 562 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 563 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 564 radd = A->rmap->rstart*m; 565 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 566 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 567 for (i=0; i<len; i++) { 568 x[i] = av[radd + i*lda + i]; 569 } 570 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 571 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatDestroy_MPIDense(Mat mat) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 #if defined(PETSC_USE_LOG) 582 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 583 #endif 584 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 585 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 586 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 587 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 588 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 589 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 590 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 591 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 592 593 ierr = PetscFree(mat->data);CHKERRQ(ierr); 594 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 595 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 609 #if defined(PETSC_HAVE_ELEMENTAL) 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 611 #endif 612 #if defined(PETSC_HAVE_SCALAPACK) 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 614 #endif 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 616 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 618 #if defined (PETSC_HAVE_CUDA) 619 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 620 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 621 #endif 622 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 623 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 624 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 625 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 628 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 630 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 631 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 636 637 #include <petscdraw.h> 638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 639 { 640 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 641 PetscErrorCode ierr; 642 PetscMPIInt rank; 643 PetscViewerType vtype; 644 PetscBool iascii,isdraw; 645 PetscViewer sviewer; 646 PetscViewerFormat format; 647 648 PetscFunctionBegin; 649 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 650 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 651 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 652 if (iascii) { 653 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 654 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 655 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 656 MatInfo info; 657 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 658 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 659 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 660 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 661 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 662 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } else if (format == PETSC_VIEWER_ASCII_INFO) { 665 PetscFunctionReturn(0); 666 } 667 } else if (isdraw) { 668 PetscDraw draw; 669 PetscBool isnull; 670 671 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 672 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 673 if (isnull) PetscFunctionReturn(0); 674 } 675 676 { 677 /* assemble the entire matrix onto first processor. */ 678 Mat A; 679 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 680 PetscInt *cols; 681 PetscScalar *vals; 682 683 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 684 if (rank == 0) { 685 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 686 } else { 687 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 688 } 689 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 690 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 691 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 692 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 693 694 /* Copy the matrix ... This isn't the most efficient means, 695 but it's quick for now */ 696 A->insertmode = INSERT_VALUES; 697 698 row = mat->rmap->rstart; 699 m = mdn->A->rmap->n; 700 for (i=0; i<m; i++) { 701 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 702 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 703 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 704 row++; 705 } 706 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 710 if (rank == 0) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 715 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 716 ierr = MatDestroy(&A);CHKERRQ(ierr); 717 } 718 PetscFunctionReturn(0); 719 } 720 721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 722 { 723 PetscErrorCode ierr; 724 PetscBool iascii,isbinary,isdraw,issocket; 725 726 PetscFunctionBegin; 727 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 729 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 731 732 if (iascii || issocket || isdraw) { 733 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else if (isbinary) { 735 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 741 { 742 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 743 Mat mdn = mat->A; 744 PetscErrorCode ierr; 745 PetscLogDouble isend[5],irecv[5]; 746 747 PetscFunctionBegin; 748 info->block_size = 1.0; 749 750 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 751 752 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 753 isend[3] = info->memory; isend[4] = info->mallocs; 754 if (flag == MAT_LOCAL) { 755 info->nz_used = isend[0]; 756 info->nz_allocated = isend[1]; 757 info->nz_unneeded = isend[2]; 758 info->memory = isend[3]; 759 info->mallocs = isend[4]; 760 } else if (flag == MAT_GLOBAL_MAX) { 761 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 762 763 info->nz_used = irecv[0]; 764 info->nz_allocated = irecv[1]; 765 info->nz_unneeded = irecv[2]; 766 info->memory = irecv[3]; 767 info->mallocs = irecv[4]; 768 } else if (flag == MAT_GLOBAL_SUM) { 769 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 770 771 info->nz_used = irecv[0]; 772 info->nz_allocated = irecv[1]; 773 info->nz_unneeded = irecv[2]; 774 info->memory = irecv[3]; 775 info->mallocs = irecv[4]; 776 } 777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 778 info->fill_ratio_needed = 0; 779 info->factor_mallocs = 0; 780 PetscFunctionReturn(0); 781 } 782 783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 784 { 785 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 786 PetscErrorCode ierr; 787 788 PetscFunctionBegin; 789 switch (op) { 790 case MAT_NEW_NONZERO_LOCATIONS: 791 case MAT_NEW_NONZERO_LOCATION_ERR: 792 case MAT_NEW_NONZERO_ALLOCATION_ERR: 793 MatCheckPreallocated(A,1); 794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 795 break; 796 case MAT_ROW_ORIENTED: 797 MatCheckPreallocated(A,1); 798 a->roworiented = flg; 799 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 800 break; 801 case MAT_FORCE_DIAGONAL_ENTRIES: 802 case MAT_KEEP_NONZERO_PATTERN: 803 case MAT_USE_HASH_TABLE: 804 case MAT_SORTED_FULL: 805 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 806 break; 807 case MAT_IGNORE_OFF_PROC_ENTRIES: 808 a->donotstash = flg; 809 break; 810 case MAT_SYMMETRIC: 811 case MAT_STRUCTURALLY_SYMMETRIC: 812 case MAT_HERMITIAN: 813 case MAT_SYMMETRY_ETERNAL: 814 case MAT_IGNORE_LOWER_TRIANGULAR: 815 case MAT_IGNORE_ZERO_ENTRIES: 816 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 817 break; 818 default: 819 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 825 { 826 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 827 const PetscScalar *l; 828 PetscScalar x,*v,*vv,*r; 829 PetscErrorCode ierr; 830 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 831 832 PetscFunctionBegin; 833 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 834 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 835 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 836 if (ll) { 837 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 838 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 839 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 840 for (i=0; i<m; i++) { 841 x = l[i]; 842 v = vv + i; 843 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 844 } 845 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 846 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 847 } 848 if (rr) { 849 const PetscScalar *ar; 850 851 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 852 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 853 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 854 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 855 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr); 856 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr); 857 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 858 for (i=0; i<n; i++) { 859 x = r[i]; 860 v = vv + i*lda; 861 for (j=0; j<m; j++) (*v++) *= x; 862 } 863 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 864 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 865 } 866 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 871 { 872 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 873 PetscErrorCode ierr; 874 PetscInt i,j; 875 PetscMPIInt size; 876 PetscReal sum = 0.0; 877 const PetscScalar *av,*v; 878 879 PetscFunctionBegin; 880 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 881 v = av; 882 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 883 if (size == 1) { 884 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 885 } else { 886 if (type == NORM_FROBENIUS) { 887 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 889 } 890 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 891 *nrm = PetscSqrtReal(*nrm); 892 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 893 } else if (type == NORM_1) { 894 PetscReal *tmp,*tmp2; 895 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 896 *nrm = 0.0; 897 v = av; 898 for (j=0; j<mdn->A->cmap->n; j++) { 899 for (i=0; i<mdn->A->rmap->n; i++) { 900 tmp[j] += PetscAbsScalar(*v); v++; 901 } 902 } 903 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 904 for (j=0; j<A->cmap->N; j++) { 905 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 906 } 907 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 908 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 909 } else if (type == NORM_INFINITY) { /* max row norm */ 910 PetscReal ntemp; 911 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 912 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 913 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 914 } 915 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 920 { 921 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 922 Mat B; 923 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 924 PetscErrorCode ierr; 925 PetscInt j,i,lda; 926 PetscScalar *v; 927 928 PetscFunctionBegin; 929 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 930 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 931 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 932 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 933 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 934 } else B = *matout; 935 936 m = a->A->rmap->n; n = a->A->cmap->n; 937 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 938 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 939 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 940 for (i=0; i<m; i++) rwork[i] = rstart + i; 941 for (j=0; j<n; j++) { 942 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 943 v += lda; 944 } 945 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 946 ierr = PetscFree(rwork);CHKERRQ(ierr); 947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 949 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 950 *matout = B; 951 } else { 952 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 959 960 PetscErrorCode MatSetUp_MPIDense(Mat A) 961 { 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 966 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 967 if (!A->preallocated) { 968 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 974 { 975 PetscErrorCode ierr; 976 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 977 978 PetscFunctionBegin; 979 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatConjugate_MPIDense(Mat mat) 984 { 985 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatConjugate(a->A);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PetscErrorCode MatRealPart_MPIDense(Mat A) 994 { 995 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1004 { 1005 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1014 { 1015 PetscErrorCode ierr; 1016 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1017 1018 PetscFunctionBegin; 1019 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1020 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1021 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 1026 1027 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 1028 { 1029 PetscErrorCode ierr; 1030 PetscInt i,m,n; 1031 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1032 PetscReal *work; 1033 1034 PetscFunctionBegin; 1035 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 1036 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1037 if (type == REDUCTION_MEAN_REALPART) { 1038 ierr = MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work);CHKERRQ(ierr); 1039 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1040 ierr = MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work);CHKERRQ(ierr); 1041 } else { 1042 ierr = MatGetColumnReductions_SeqDense(a->A,type,work);CHKERRQ(ierr); 1043 } 1044 if (type == NORM_2) { 1045 for (i=0; i<n; i++) work[i] *= work[i]; 1046 } 1047 if (type == NORM_INFINITY) { 1048 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRMPI(ierr); 1049 } else { 1050 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRMPI(ierr); 1051 } 1052 ierr = PetscFree(work);CHKERRQ(ierr); 1053 if (type == NORM_2) { 1054 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1055 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1056 for (i=0; i<n; i++) reductions[i] /= m; 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #if defined(PETSC_HAVE_CUDA) 1062 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1063 { 1064 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt lda; 1067 1068 PetscFunctionBegin; 1069 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1070 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1071 if (!a->cvec) { 1072 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1073 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1074 } 1075 a->vecinuse = col + 1; 1076 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1077 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1078 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1079 *v = a->cvec; 1080 PetscFunctionReturn(0); 1081 } 1082 1083 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1084 { 1085 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1090 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1091 a->vecinuse = 0; 1092 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1093 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1094 *v = NULL; 1095 PetscFunctionReturn(0); 1096 } 1097 1098 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1099 { 1100 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1101 PetscInt lda; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1106 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1107 if (!a->cvec) { 1108 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1109 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1110 } 1111 a->vecinuse = col + 1; 1112 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1113 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1114 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1115 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1116 *v = a->cvec; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1121 { 1122 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1127 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1128 a->vecinuse = 0; 1129 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1130 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1131 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1132 *v = NULL; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1137 { 1138 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1139 PetscErrorCode ierr; 1140 PetscInt lda; 1141 1142 PetscFunctionBegin; 1143 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1144 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1145 if (!a->cvec) { 1146 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1147 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1148 } 1149 a->vecinuse = col + 1; 1150 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1151 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1152 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1153 *v = a->cvec; 1154 PetscFunctionReturn(0); 1155 } 1156 1157 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1158 { 1159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1164 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1165 a->vecinuse = 0; 1166 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1167 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1168 *v = NULL; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1173 { 1174 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1179 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1180 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1185 { 1186 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1191 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1192 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1197 { 1198 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1203 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1204 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1209 { 1210 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1219 { 1220 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1229 { 1230 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1239 { 1240 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1249 { 1250 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1259 { 1260 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1269 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1270 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1271 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1272 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1273 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1274 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1275 1276 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1277 { 1278 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1283 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1284 if (d->A) { 1285 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1286 } 1287 mat->boundtocpu = bind; 1288 if (!bind) { 1289 PetscBool iscuda; 1290 1291 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1292 if (!iscuda) { 1293 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1294 } 1295 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1296 if (!iscuda) { 1297 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1298 } 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1305 } else { 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1312 } 1313 if (d->cmat) { 1314 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1315 } 1316 PetscFunctionReturn(0); 1317 } 1318 1319 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1320 { 1321 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1322 PetscErrorCode ierr; 1323 PetscBool iscuda; 1324 1325 PetscFunctionBegin; 1326 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1327 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1328 if (!iscuda) PetscFunctionReturn(0); 1329 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1330 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1331 if (!d->A) { 1332 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1333 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1334 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1335 } 1336 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1337 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1338 A->preallocated = PETSC_TRUE; 1339 PetscFunctionReturn(0); 1340 } 1341 #endif 1342 1343 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1344 { 1345 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1354 { 1355 PetscFunctionBegin; 1356 *missing = PETSC_FALSE; 1357 PetscFunctionReturn(0); 1358 } 1359 1360 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1361 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1362 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1363 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1364 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1365 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1366 1367 /* -------------------------------------------------------------------*/ 1368 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1369 MatGetRow_MPIDense, 1370 MatRestoreRow_MPIDense, 1371 MatMult_MPIDense, 1372 /* 4*/ MatMultAdd_MPIDense, 1373 MatMultTranspose_MPIDense, 1374 MatMultTransposeAdd_MPIDense, 1375 NULL, 1376 NULL, 1377 NULL, 1378 /* 10*/ NULL, 1379 NULL, 1380 NULL, 1381 NULL, 1382 MatTranspose_MPIDense, 1383 /* 15*/ MatGetInfo_MPIDense, 1384 MatEqual_MPIDense, 1385 MatGetDiagonal_MPIDense, 1386 MatDiagonalScale_MPIDense, 1387 MatNorm_MPIDense, 1388 /* 20*/ MatAssemblyBegin_MPIDense, 1389 MatAssemblyEnd_MPIDense, 1390 MatSetOption_MPIDense, 1391 MatZeroEntries_MPIDense, 1392 /* 24*/ MatZeroRows_MPIDense, 1393 NULL, 1394 NULL, 1395 NULL, 1396 NULL, 1397 /* 29*/ MatSetUp_MPIDense, 1398 NULL, 1399 NULL, 1400 MatGetDiagonalBlock_MPIDense, 1401 NULL, 1402 /* 34*/ MatDuplicate_MPIDense, 1403 NULL, 1404 NULL, 1405 NULL, 1406 NULL, 1407 /* 39*/ MatAXPY_MPIDense, 1408 MatCreateSubMatrices_MPIDense, 1409 NULL, 1410 MatGetValues_MPIDense, 1411 MatCopy_MPIDense, 1412 /* 44*/ NULL, 1413 MatScale_MPIDense, 1414 MatShift_Basic, 1415 NULL, 1416 NULL, 1417 /* 49*/ MatSetRandom_MPIDense, 1418 NULL, 1419 NULL, 1420 NULL, 1421 NULL, 1422 /* 54*/ NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 NULL, 1427 /* 59*/ MatCreateSubMatrix_MPIDense, 1428 MatDestroy_MPIDense, 1429 MatView_MPIDense, 1430 NULL, 1431 NULL, 1432 /* 64*/ NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /* 69*/ NULL, 1438 NULL, 1439 NULL, 1440 NULL, 1441 NULL, 1442 /* 74*/ NULL, 1443 NULL, 1444 NULL, 1445 NULL, 1446 NULL, 1447 /* 79*/ NULL, 1448 NULL, 1449 NULL, 1450 NULL, 1451 /* 83*/ MatLoad_MPIDense, 1452 NULL, 1453 NULL, 1454 NULL, 1455 NULL, 1456 NULL, 1457 /* 89*/ NULL, 1458 NULL, 1459 NULL, 1460 NULL, 1461 NULL, 1462 /* 94*/ NULL, 1463 NULL, 1464 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1465 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1466 NULL, 1467 /* 99*/ MatProductSetFromOptions_MPIDense, 1468 NULL, 1469 NULL, 1470 MatConjugate_MPIDense, 1471 NULL, 1472 /*104*/ NULL, 1473 MatRealPart_MPIDense, 1474 MatImaginaryPart_MPIDense, 1475 NULL, 1476 NULL, 1477 /*109*/ NULL, 1478 NULL, 1479 NULL, 1480 MatGetColumnVector_MPIDense, 1481 MatMissingDiagonal_MPIDense, 1482 /*114*/ NULL, 1483 NULL, 1484 NULL, 1485 NULL, 1486 NULL, 1487 /*119*/ NULL, 1488 NULL, 1489 NULL, 1490 NULL, 1491 NULL, 1492 /*124*/ NULL, 1493 MatGetColumnReductions_MPIDense, 1494 NULL, 1495 NULL, 1496 NULL, 1497 /*129*/ NULL, 1498 NULL, 1499 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1500 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1501 NULL, 1502 /*134*/ NULL, 1503 NULL, 1504 NULL, 1505 NULL, 1506 NULL, 1507 /*139*/ NULL, 1508 NULL, 1509 NULL, 1510 NULL, 1511 NULL, 1512 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1513 /*145*/ NULL, 1514 NULL, 1515 NULL 1516 }; 1517 1518 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1519 { 1520 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1521 PetscBool iscuda; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1526 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1527 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1528 if (!a->A) { 1529 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1530 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1531 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1532 } 1533 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1534 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1535 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1536 mat->preallocated = PETSC_TRUE; 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1541 { 1542 Mat B,C; 1543 PetscErrorCode ierr; 1544 1545 PetscFunctionBegin; 1546 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr); 1547 ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1548 ierr = MatDestroy(&C);CHKERRQ(ierr); 1549 if (reuse == MAT_REUSE_MATRIX) { 1550 C = *newmat; 1551 } else C = NULL; 1552 ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1553 ierr = MatDestroy(&B);CHKERRQ(ierr); 1554 if (reuse == MAT_INPLACE_MATRIX) { 1555 ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr); 1556 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1561 { 1562 Mat B,C; 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr); 1567 ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1568 if (reuse == MAT_REUSE_MATRIX) { 1569 C = *newmat; 1570 } else C = NULL; 1571 ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1572 ierr = MatDestroy(&B);CHKERRQ(ierr); 1573 if (reuse == MAT_INPLACE_MATRIX) { 1574 ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr); 1575 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #if defined(PETSC_HAVE_ELEMENTAL) 1580 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1581 { 1582 Mat mat_elemental; 1583 PetscErrorCode ierr; 1584 PetscScalar *v; 1585 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1586 1587 PetscFunctionBegin; 1588 if (reuse == MAT_REUSE_MATRIX) { 1589 mat_elemental = *newmat; 1590 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1591 } else { 1592 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1593 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1594 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1595 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1596 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1597 } 1598 1599 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1600 for (i=0; i<N; i++) cols[i] = i; 1601 for (i=0; i<m; i++) rows[i] = rstart + i; 1602 1603 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1604 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1605 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1606 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1609 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1610 1611 if (reuse == MAT_INPLACE_MATRIX) { 1612 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1613 } else { 1614 *newmat = mat_elemental; 1615 } 1616 PetscFunctionReturn(0); 1617 } 1618 #endif 1619 1620 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1621 { 1622 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 1630 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1631 { 1632 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1633 PetscErrorCode ierr; 1634 1635 PetscFunctionBegin; 1636 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1641 { 1642 PetscErrorCode ierr; 1643 Mat_MPIDense *mat; 1644 PetscInt m,nloc,N; 1645 1646 PetscFunctionBegin; 1647 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1648 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1649 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1650 PetscInt sum; 1651 1652 if (n == PETSC_DECIDE) { 1653 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1654 } 1655 /* Check sum(n) = N */ 1656 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 1657 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1658 1659 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1660 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1661 } 1662 1663 /* numeric phase */ 1664 mat = (Mat_MPIDense*)(*outmat)->data; 1665 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1666 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #if defined(PETSC_HAVE_CUDA) 1672 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1673 { 1674 Mat B; 1675 Mat_MPIDense *m; 1676 PetscErrorCode ierr; 1677 1678 PetscFunctionBegin; 1679 if (reuse == MAT_INITIAL_MATRIX) { 1680 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1681 } else if (reuse == MAT_REUSE_MATRIX) { 1682 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1683 } 1684 1685 B = *newmat; 1686 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1687 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1688 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1689 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1696 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1697 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1698 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1699 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1700 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1701 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1702 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1703 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1704 m = (Mat_MPIDense*)(B)->data; 1705 if (m->A) { 1706 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1707 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1708 } 1709 B->ops->bindtocpu = NULL; 1710 B->offloadmask = PETSC_OFFLOAD_CPU; 1711 PetscFunctionReturn(0); 1712 } 1713 1714 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1715 { 1716 Mat B; 1717 Mat_MPIDense *m; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 if (reuse == MAT_INITIAL_MATRIX) { 1722 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1723 } else if (reuse == MAT_REUSE_MATRIX) { 1724 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1725 } 1726 1727 B = *newmat; 1728 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1729 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1730 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1732 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1734 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1735 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1737 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1745 m = (Mat_MPIDense*)(B->data); 1746 if (m->A) { 1747 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1748 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1749 B->offloadmask = PETSC_OFFLOAD_BOTH; 1750 } else { 1751 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1752 } 1753 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1754 1755 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1756 PetscFunctionReturn(0); 1757 } 1758 #endif 1759 1760 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1761 { 1762 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1763 PetscErrorCode ierr; 1764 PetscInt lda; 1765 1766 PetscFunctionBegin; 1767 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1768 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1769 if (!a->cvec) { 1770 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1771 } 1772 a->vecinuse = col + 1; 1773 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1774 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1775 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1776 *v = a->cvec; 1777 PetscFunctionReturn(0); 1778 } 1779 1780 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1781 { 1782 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1787 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1788 a->vecinuse = 0; 1789 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1790 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1791 *v = NULL; 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1796 { 1797 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1798 PetscErrorCode ierr; 1799 PetscInt lda; 1800 1801 PetscFunctionBegin; 1802 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1803 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1804 if (!a->cvec) { 1805 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1806 } 1807 a->vecinuse = col + 1; 1808 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1809 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1810 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1811 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1812 *v = a->cvec; 1813 PetscFunctionReturn(0); 1814 } 1815 1816 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1817 { 1818 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1823 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1824 a->vecinuse = 0; 1825 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1826 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1827 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1828 *v = NULL; 1829 PetscFunctionReturn(0); 1830 } 1831 1832 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1833 { 1834 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1835 PetscErrorCode ierr; 1836 PetscInt lda; 1837 1838 PetscFunctionBegin; 1839 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1840 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1841 if (!a->cvec) { 1842 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1843 } 1844 a->vecinuse = col + 1; 1845 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1846 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1847 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1848 *v = a->cvec; 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1853 { 1854 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1855 PetscErrorCode ierr; 1856 1857 PetscFunctionBegin; 1858 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1859 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1860 a->vecinuse = 0; 1861 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1862 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1863 *v = NULL; 1864 PetscFunctionReturn(0); 1865 } 1866 1867 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1868 { 1869 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1870 Mat_MPIDense *c; 1871 PetscErrorCode ierr; 1872 MPI_Comm comm; 1873 PetscBool setup = PETSC_FALSE; 1874 1875 PetscFunctionBegin; 1876 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1877 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1878 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1879 if (!a->cmat) { 1880 setup = PETSC_TRUE; 1881 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1882 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1883 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1884 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1885 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1886 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1887 } else if (cend-cbegin != a->cmat->cmap->N) { 1888 setup = PETSC_TRUE; 1889 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1890 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1891 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1892 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1893 } 1894 c = (Mat_MPIDense*)a->cmat->data; 1895 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1896 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1897 if (setup) { /* do we really need this? */ 1898 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1899 } 1900 a->cmat->preallocated = PETSC_TRUE; 1901 a->cmat->assembled = PETSC_TRUE; 1902 a->matinuse = cbegin + 1; 1903 *v = a->cmat; 1904 PetscFunctionReturn(0); 1905 } 1906 1907 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1908 { 1909 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1910 Mat_MPIDense *c; 1911 PetscErrorCode ierr; 1912 1913 PetscFunctionBegin; 1914 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1915 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1916 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1917 a->matinuse = 0; 1918 c = (Mat_MPIDense*)a->cmat->data; 1919 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1920 *v = NULL; 1921 PetscFunctionReturn(0); 1922 } 1923 1924 /*MC 1925 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1926 1927 Options Database Keys: 1928 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1929 1930 Level: beginner 1931 1932 .seealso: MatCreateDense() 1933 1934 M*/ 1935 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1936 { 1937 Mat_MPIDense *a; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1942 mat->data = (void*)a; 1943 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1944 1945 mat->insertmode = NOT_SET_VALUES; 1946 1947 /* build cache for off array entries formed */ 1948 a->donotstash = PETSC_FALSE; 1949 1950 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1951 1952 /* stuff used for matrix vector multiply */ 1953 a->lvec = NULL; 1954 a->Mvctx = NULL; 1955 a->roworiented = PETSC_TRUE; 1956 1957 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1958 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1959 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1960 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1961 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1962 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1963 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1964 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1965 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1966 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1967 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1968 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1969 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1970 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1971 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1972 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1973 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1974 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1976 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 1977 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr); 1978 #if defined(PETSC_HAVE_ELEMENTAL) 1979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1980 #endif 1981 #if defined(PETSC_HAVE_SCALAPACK) 1982 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1983 #endif 1984 #if defined(PETSC_HAVE_CUDA) 1985 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1986 #endif 1987 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1988 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1989 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1990 #if defined(PETSC_HAVE_CUDA) 1991 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1992 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1993 #endif 1994 1995 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1996 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1997 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /*MC 2002 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 2003 2004 Options Database Keys: 2005 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 2006 2007 Level: beginner 2008 2009 .seealso: 2010 2011 M*/ 2012 #if defined(PETSC_HAVE_CUDA) 2013 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2014 { 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 2019 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 2020 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2021 PetscFunctionReturn(0); 2022 } 2023 #endif 2024 2025 /*MC 2026 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2027 2028 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 2029 and MATMPIDENSE otherwise. 2030 2031 Options Database Keys: 2032 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 2033 2034 Level: beginner 2035 2036 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 2037 M*/ 2038 2039 /*MC 2040 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2041 2042 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 2043 and MATMPIDENSECUDA otherwise. 2044 2045 Options Database Keys: 2046 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 2047 2048 Level: beginner 2049 2050 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 2051 M*/ 2052 2053 /*@C 2054 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2055 2056 Collective 2057 2058 Input Parameters: 2059 . B - the matrix 2060 - data - optional location of matrix data. Set data=NULL for PETSc 2061 to control all matrix memory allocation. 2062 2063 Notes: 2064 The dense format is fully compatible with standard Fortran 77 2065 storage by columns. 2066 2067 The data input variable is intended primarily for Fortran programmers 2068 who wish to allocate their own matrix memory space. Most users should 2069 set data=NULL. 2070 2071 Level: intermediate 2072 2073 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2074 @*/ 2075 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2076 { 2077 PetscErrorCode ierr; 2078 2079 PetscFunctionBegin; 2080 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2081 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 /*@ 2086 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2087 array provided by the user. This is useful to avoid copying an array 2088 into a matrix 2089 2090 Not Collective 2091 2092 Input Parameters: 2093 + mat - the matrix 2094 - array - the array in column major order 2095 2096 Notes: 2097 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2098 freed when the matrix is destroyed. 2099 2100 Level: developer 2101 2102 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2103 2104 @*/ 2105 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2106 { 2107 PetscErrorCode ierr; 2108 2109 PetscFunctionBegin; 2110 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2111 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2112 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2113 #if defined(PETSC_HAVE_CUDA) 2114 mat->offloadmask = PETSC_OFFLOAD_CPU; 2115 #endif 2116 PetscFunctionReturn(0); 2117 } 2118 2119 /*@ 2120 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2121 2122 Not Collective 2123 2124 Input Parameters: 2125 . mat - the matrix 2126 2127 Notes: 2128 You can only call this after a call to MatDensePlaceArray() 2129 2130 Level: developer 2131 2132 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2133 2134 @*/ 2135 PetscErrorCode MatDenseResetArray(Mat mat) 2136 { 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2141 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2142 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 /*@ 2147 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2148 array provided by the user. This is useful to avoid copying an array 2149 into a matrix 2150 2151 Not Collective 2152 2153 Input Parameters: 2154 + mat - the matrix 2155 - array - the array in column major order 2156 2157 Notes: 2158 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2159 freed by the user. It will be freed when the matrix is destroyed. 2160 2161 Level: developer 2162 2163 .seealso: MatDenseGetArray(), VecReplaceArray() 2164 @*/ 2165 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2166 { 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2171 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2172 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2173 #if defined(PETSC_HAVE_CUDA) 2174 mat->offloadmask = PETSC_OFFLOAD_CPU; 2175 #endif 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #if defined(PETSC_HAVE_CUDA) 2180 /*@C 2181 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2182 array provided by the user. This is useful to avoid copying an array 2183 into a matrix 2184 2185 Not Collective 2186 2187 Input Parameters: 2188 + mat - the matrix 2189 - array - the array in column major order 2190 2191 Notes: 2192 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2193 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2194 2195 Level: developer 2196 2197 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2198 @*/ 2199 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2200 { 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2205 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2206 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2207 mat->offloadmask = PETSC_OFFLOAD_GPU; 2208 PetscFunctionReturn(0); 2209 } 2210 2211 /*@C 2212 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2213 2214 Not Collective 2215 2216 Input Parameters: 2217 . mat - the matrix 2218 2219 Notes: 2220 You can only call this after a call to MatDenseCUDAPlaceArray() 2221 2222 Level: developer 2223 2224 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2225 2226 @*/ 2227 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2228 { 2229 PetscErrorCode ierr; 2230 2231 PetscFunctionBegin; 2232 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2233 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2234 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 /*@C 2239 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2240 array provided by the user. This is useful to avoid copying an array 2241 into a matrix 2242 2243 Not Collective 2244 2245 Input Parameters: 2246 + mat - the matrix 2247 - array - the array in column major order 2248 2249 Notes: 2250 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2251 The memory passed in CANNOT be freed by the user. It will be freed 2252 when the matrix is destroyed. The array should respect the matrix leading dimension. 2253 2254 Level: developer 2255 2256 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2257 @*/ 2258 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2259 { 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2264 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2265 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2266 mat->offloadmask = PETSC_OFFLOAD_GPU; 2267 PetscFunctionReturn(0); 2268 } 2269 2270 /*@C 2271 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2272 2273 Not Collective 2274 2275 Input Parameters: 2276 . A - the matrix 2277 2278 Output Parameters 2279 . array - the GPU array in column major order 2280 2281 Notes: 2282 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2283 2284 Level: developer 2285 2286 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2287 @*/ 2288 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2289 { 2290 PetscErrorCode ierr; 2291 2292 PetscFunctionBegin; 2293 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2294 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2295 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2296 PetscFunctionReturn(0); 2297 } 2298 2299 /*@C 2300 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2301 2302 Not Collective 2303 2304 Input Parameters: 2305 + A - the matrix 2306 - array - the GPU array in column major order 2307 2308 Notes: 2309 2310 Level: developer 2311 2312 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2313 @*/ 2314 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2315 { 2316 PetscErrorCode ierr; 2317 2318 PetscFunctionBegin; 2319 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2320 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2321 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2322 A->offloadmask = PETSC_OFFLOAD_GPU; 2323 PetscFunctionReturn(0); 2324 } 2325 2326 /*@C 2327 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2328 2329 Not Collective 2330 2331 Input Parameters: 2332 . A - the matrix 2333 2334 Output Parameters 2335 . array - the GPU array in column major order 2336 2337 Notes: 2338 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2339 2340 Level: developer 2341 2342 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2343 @*/ 2344 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2345 { 2346 PetscErrorCode ierr; 2347 2348 PetscFunctionBegin; 2349 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2350 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2351 PetscFunctionReturn(0); 2352 } 2353 2354 /*@C 2355 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2356 2357 Not Collective 2358 2359 Input Parameters: 2360 + A - the matrix 2361 - array - the GPU array in column major order 2362 2363 Notes: 2364 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2365 2366 Level: developer 2367 2368 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2369 @*/ 2370 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2371 { 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 /*@C 2380 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2381 2382 Not Collective 2383 2384 Input Parameters: 2385 . A - the matrix 2386 2387 Output Parameters 2388 . array - the GPU array in column major order 2389 2390 Notes: 2391 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2392 2393 Level: developer 2394 2395 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2396 @*/ 2397 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2398 { 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2403 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2404 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 /*@C 2409 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2410 2411 Not Collective 2412 2413 Input Parameters: 2414 + A - the matrix 2415 - array - the GPU array in column major order 2416 2417 Notes: 2418 2419 Level: developer 2420 2421 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2422 @*/ 2423 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2424 { 2425 PetscErrorCode ierr; 2426 2427 PetscFunctionBegin; 2428 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2429 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2430 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2431 A->offloadmask = PETSC_OFFLOAD_GPU; 2432 PetscFunctionReturn(0); 2433 } 2434 #endif 2435 2436 /*@C 2437 MatCreateDense - Creates a matrix in dense format. 2438 2439 Collective 2440 2441 Input Parameters: 2442 + comm - MPI communicator 2443 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2444 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2445 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2446 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2447 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2448 to control all matrix memory allocation. 2449 2450 Output Parameter: 2451 . A - the matrix 2452 2453 Notes: 2454 The dense format is fully compatible with standard Fortran 77 2455 storage by columns. 2456 Note that, although local portions of the matrix are stored in column-major 2457 order, the matrix is partitioned across MPI ranks by row. 2458 2459 The data input variable is intended primarily for Fortran programmers 2460 who wish to allocate their own matrix memory space. Most users should 2461 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2462 2463 The user MUST specify either the local or global matrix dimensions 2464 (possibly both). 2465 2466 Level: intermediate 2467 2468 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2469 @*/ 2470 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2471 { 2472 PetscErrorCode ierr; 2473 PetscMPIInt size; 2474 2475 PetscFunctionBegin; 2476 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2477 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2478 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2479 if (size > 1) { 2480 PetscBool havedata = (PetscBool)!!data; 2481 2482 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2483 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2484 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 2485 if (havedata) { /* user provided data array, so no need to assemble */ 2486 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2487 (*A)->assembled = PETSC_TRUE; 2488 } 2489 } else { 2490 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2491 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2492 } 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #if defined(PETSC_HAVE_CUDA) 2497 /*@C 2498 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2499 2500 Collective 2501 2502 Input Parameters: 2503 + comm - MPI communicator 2504 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2505 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2506 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2507 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2508 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2509 to control matrix memory allocation. 2510 2511 Output Parameter: 2512 . A - the matrix 2513 2514 Notes: 2515 2516 Level: intermediate 2517 2518 .seealso: MatCreate(), MatCreateDense() 2519 @*/ 2520 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2521 { 2522 PetscErrorCode ierr; 2523 PetscMPIInt size; 2524 2525 PetscFunctionBegin; 2526 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2527 PetscValidLogicalCollectiveBool(*A,!!data,6); 2528 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2529 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2530 if (size > 1) { 2531 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2532 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2533 if (data) { /* user provided data array, so no need to assemble */ 2534 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2535 (*A)->assembled = PETSC_TRUE; 2536 } 2537 } else { 2538 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2539 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2540 } 2541 PetscFunctionReturn(0); 2542 } 2543 #endif 2544 2545 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2546 { 2547 Mat mat; 2548 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2549 PetscErrorCode ierr; 2550 2551 PetscFunctionBegin; 2552 *newmat = NULL; 2553 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2554 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2555 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2556 a = (Mat_MPIDense*)mat->data; 2557 2558 mat->factortype = A->factortype; 2559 mat->assembled = PETSC_TRUE; 2560 mat->preallocated = PETSC_TRUE; 2561 2562 mat->insertmode = NOT_SET_VALUES; 2563 a->donotstash = oldmat->donotstash; 2564 2565 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2566 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2567 2568 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2569 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2570 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2571 2572 *newmat = mat; 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2577 { 2578 PetscErrorCode ierr; 2579 PetscBool isbinary; 2580 #if defined(PETSC_HAVE_HDF5) 2581 PetscBool ishdf5; 2582 #endif 2583 2584 PetscFunctionBegin; 2585 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2586 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2587 /* force binary viewer to load .info file if it has not yet done so */ 2588 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2590 #if defined(PETSC_HAVE_HDF5) 2591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2592 #endif 2593 if (isbinary) { 2594 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2595 #if defined(PETSC_HAVE_HDF5) 2596 } else if (ishdf5) { 2597 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2598 #endif 2599 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2604 { 2605 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2606 Mat a,b; 2607 PetscBool flg; 2608 PetscErrorCode ierr; 2609 2610 PetscFunctionBegin; 2611 a = matA->A; 2612 b = matB->A; 2613 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2614 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2615 PetscFunctionReturn(0); 2616 } 2617 2618 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2619 { 2620 PetscErrorCode ierr; 2621 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2622 2623 PetscFunctionBegin; 2624 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2625 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2626 ierr = PetscFree(atb);CHKERRQ(ierr); 2627 PetscFunctionReturn(0); 2628 } 2629 2630 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2631 { 2632 PetscErrorCode ierr; 2633 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2634 2635 PetscFunctionBegin; 2636 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2637 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2638 ierr = PetscFree(abt);CHKERRQ(ierr); 2639 PetscFunctionReturn(0); 2640 } 2641 2642 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2643 { 2644 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2645 Mat_TransMatMultDense *atb; 2646 PetscErrorCode ierr; 2647 MPI_Comm comm; 2648 PetscMPIInt size,*recvcounts; 2649 PetscScalar *carray,*sendbuf; 2650 const PetscScalar *atbarray; 2651 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2652 const PetscInt *ranges; 2653 2654 PetscFunctionBegin; 2655 MatCheckProduct(C,3); 2656 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2657 atb = (Mat_TransMatMultDense *)C->product->data; 2658 recvcounts = atb->recvcounts; 2659 sendbuf = atb->sendbuf; 2660 2661 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2662 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2663 2664 /* compute atbarray = aseq^T * bseq */ 2665 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2666 2667 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2668 2669 /* arrange atbarray into sendbuf */ 2670 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2671 for (proc=0, k=0; proc<size; proc++) { 2672 for (j=0; j<cN; j++) { 2673 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2674 } 2675 } 2676 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2677 2678 /* sum all atbarray to local values of C */ 2679 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2680 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr); 2681 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2682 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2683 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2688 { 2689 PetscErrorCode ierr; 2690 MPI_Comm comm; 2691 PetscMPIInt size; 2692 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2693 Mat_TransMatMultDense *atb; 2694 PetscBool cisdense; 2695 PetscInt i; 2696 const PetscInt *ranges; 2697 2698 PetscFunctionBegin; 2699 MatCheckProduct(C,3); 2700 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2701 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2702 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2703 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2704 } 2705 2706 /* create matrix product C */ 2707 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2708 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2709 if (!cisdense) { 2710 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2711 } 2712 ierr = MatSetUp(C);CHKERRQ(ierr); 2713 2714 /* create data structure for reuse C */ 2715 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2716 ierr = PetscNew(&atb);CHKERRQ(ierr); 2717 cM = C->rmap->N; 2718 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2719 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2720 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2721 2722 C->product->data = atb; 2723 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2724 PetscFunctionReturn(0); 2725 } 2726 2727 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2728 { 2729 PetscErrorCode ierr; 2730 MPI_Comm comm; 2731 PetscMPIInt i, size; 2732 PetscInt maxRows, bufsiz; 2733 PetscMPIInt tag; 2734 PetscInt alg; 2735 Mat_MatTransMultDense *abt; 2736 Mat_Product *product = C->product; 2737 PetscBool flg; 2738 2739 PetscFunctionBegin; 2740 MatCheckProduct(C,4); 2741 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2742 /* check local size of A and B */ 2743 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2744 2745 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2746 alg = flg ? 0 : 1; 2747 2748 /* setup matrix product C */ 2749 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2750 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2751 ierr = MatSetUp(C);CHKERRQ(ierr); 2752 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2753 2754 /* create data structure for reuse C */ 2755 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2756 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2757 ierr = PetscNew(&abt);CHKERRQ(ierr); 2758 abt->tag = tag; 2759 abt->alg = alg; 2760 switch (alg) { 2761 case 1: /* alg: "cyclic" */ 2762 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2763 bufsiz = A->cmap->N * maxRows; 2764 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2765 break; 2766 default: /* alg: "allgatherv" */ 2767 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2768 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2769 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2770 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2771 break; 2772 } 2773 2774 C->product->data = abt; 2775 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2776 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2777 PetscFunctionReturn(0); 2778 } 2779 2780 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2781 { 2782 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2783 Mat_MatTransMultDense *abt; 2784 PetscErrorCode ierr; 2785 MPI_Comm comm; 2786 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2787 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2788 PetscInt i,cK=A->cmap->N,k,j,bn; 2789 PetscScalar _DOne=1.0,_DZero=0.0; 2790 const PetscScalar *av,*bv; 2791 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2792 MPI_Request reqs[2]; 2793 const PetscInt *ranges; 2794 2795 PetscFunctionBegin; 2796 MatCheckProduct(C,3); 2797 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2798 abt = (Mat_MatTransMultDense*)C->product->data; 2799 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2800 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2801 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2802 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2803 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2804 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2805 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2806 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2807 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2808 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2809 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2810 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2811 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2812 bn = B->rmap->n; 2813 if (blda == bn) { 2814 sendbuf = (PetscScalar*)bv; 2815 } else { 2816 sendbuf = abt->buf[0]; 2817 for (k = 0, i = 0; i < cK; i++) { 2818 for (j = 0; j < bn; j++, k++) { 2819 sendbuf[k] = bv[i * blda + j]; 2820 } 2821 } 2822 } 2823 if (size > 1) { 2824 sendto = (rank + size - 1) % size; 2825 recvfrom = (rank + size + 1) % size; 2826 } else { 2827 sendto = recvfrom = 0; 2828 } 2829 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2830 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2831 recvisfrom = rank; 2832 for (i = 0; i < size; i++) { 2833 /* we have finished receiving in sending, bufs can be read/modified */ 2834 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2835 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2836 2837 if (nextrecvisfrom != rank) { 2838 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2839 sendsiz = cK * bn; 2840 recvsiz = cK * nextbn; 2841 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2842 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr); 2843 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr); 2844 } 2845 2846 /* local aseq * sendbuf^T */ 2847 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2848 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2849 2850 if (nextrecvisfrom != rank) { 2851 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2852 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 2853 } 2854 bn = nextbn; 2855 recvisfrom = nextrecvisfrom; 2856 sendbuf = recvbuf; 2857 } 2858 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2859 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2860 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2861 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2867 { 2868 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2869 Mat_MatTransMultDense *abt; 2870 PetscErrorCode ierr; 2871 MPI_Comm comm; 2872 PetscMPIInt size; 2873 PetscScalar *cv, *sendbuf, *recvbuf; 2874 const PetscScalar *av,*bv; 2875 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2876 PetscScalar _DOne=1.0,_DZero=0.0; 2877 PetscBLASInt cm, cn, ck, alda, clda; 2878 2879 PetscFunctionBegin; 2880 MatCheckProduct(C,3); 2881 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2882 abt = (Mat_MatTransMultDense*)C->product->data; 2883 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2884 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2885 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2886 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2887 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2888 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2889 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2890 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2891 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2892 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2893 /* copy transpose of B into buf[0] */ 2894 bn = B->rmap->n; 2895 sendbuf = abt->buf[0]; 2896 recvbuf = abt->buf[1]; 2897 for (k = 0, j = 0; j < bn; j++) { 2898 for (i = 0; i < cK; i++, k++) { 2899 sendbuf[k] = bv[i * blda + j]; 2900 } 2901 } 2902 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2903 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr); 2904 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2905 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2906 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2907 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2908 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2909 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2910 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2911 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2912 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2913 PetscFunctionReturn(0); 2914 } 2915 2916 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2917 { 2918 Mat_MatTransMultDense *abt; 2919 PetscErrorCode ierr; 2920 2921 PetscFunctionBegin; 2922 MatCheckProduct(C,3); 2923 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2924 abt = (Mat_MatTransMultDense*)C->product->data; 2925 switch (abt->alg) { 2926 case 1: 2927 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2928 break; 2929 default: 2930 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2931 break; 2932 } 2933 PetscFunctionReturn(0); 2934 } 2935 2936 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2937 { 2938 PetscErrorCode ierr; 2939 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2940 2941 PetscFunctionBegin; 2942 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2943 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2944 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2945 ierr = PetscFree(ab);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #if defined(PETSC_HAVE_ELEMENTAL) 2950 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2951 { 2952 PetscErrorCode ierr; 2953 Mat_MatMultDense *ab; 2954 2955 PetscFunctionBegin; 2956 MatCheckProduct(C,3); 2957 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2958 ab = (Mat_MatMultDense*)C->product->data; 2959 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2960 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2961 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2962 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2967 { 2968 PetscErrorCode ierr; 2969 Mat Ae,Be,Ce; 2970 Mat_MatMultDense *ab; 2971 2972 PetscFunctionBegin; 2973 MatCheckProduct(C,4); 2974 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2975 /* check local size of A and B */ 2976 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2977 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2978 } 2979 2980 /* create elemental matrices Ae and Be */ 2981 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2982 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2983 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2984 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2985 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2986 2987 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2988 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2989 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2990 ierr = MatSetUp(Be);CHKERRQ(ierr); 2991 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2992 2993 /* compute symbolic Ce = Ae*Be */ 2994 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2995 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2996 2997 /* setup C */ 2998 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2999 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 3000 ierr = MatSetUp(C);CHKERRQ(ierr); 3001 3002 /* create data structure for reuse Cdense */ 3003 ierr = PetscNew(&ab);CHKERRQ(ierr); 3004 ab->Ae = Ae; 3005 ab->Be = Be; 3006 ab->Ce = Ce; 3007 3008 C->product->data = ab; 3009 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 3010 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 3011 PetscFunctionReturn(0); 3012 } 3013 #endif 3014 /* ----------------------------------------------- */ 3015 #if defined(PETSC_HAVE_ELEMENTAL) 3016 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 3017 { 3018 PetscFunctionBegin; 3019 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 3020 C->ops->productsymbolic = MatProductSymbolic_AB; 3021 PetscFunctionReturn(0); 3022 } 3023 #endif 3024 3025 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 3026 { 3027 Mat_Product *product = C->product; 3028 Mat A = product->A,B=product->B; 3029 3030 PetscFunctionBegin; 3031 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 3032 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 3033 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 3034 C->ops->productsymbolic = MatProductSymbolic_AtB; 3035 PetscFunctionReturn(0); 3036 } 3037 3038 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 3039 { 3040 PetscErrorCode ierr; 3041 Mat_Product *product = C->product; 3042 const char *algTypes[2] = {"allgatherv","cyclic"}; 3043 PetscInt alg,nalg = 2; 3044 PetscBool flg = PETSC_FALSE; 3045 3046 PetscFunctionBegin; 3047 /* Set default algorithm */ 3048 alg = 0; /* default is allgatherv */ 3049 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 3050 if (flg) { 3051 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3052 } 3053 3054 /* Get runtime option */ 3055 if (product->api_user) { 3056 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 3057 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3058 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3059 } else { 3060 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 3061 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3062 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3063 } 3064 if (flg) { 3065 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3066 } 3067 3068 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3069 C->ops->productsymbolic = MatProductSymbolic_ABt; 3070 PetscFunctionReturn(0); 3071 } 3072 3073 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3074 { 3075 PetscErrorCode ierr; 3076 Mat_Product *product = C->product; 3077 3078 PetscFunctionBegin; 3079 switch (product->type) { 3080 #if defined(PETSC_HAVE_ELEMENTAL) 3081 case MATPRODUCT_AB: 3082 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3083 break; 3084 #endif 3085 case MATPRODUCT_AtB: 3086 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3087 break; 3088 case MATPRODUCT_ABt: 3089 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3090 break; 3091 default: 3092 break; 3093 } 3094 PetscFunctionReturn(0); 3095 } 3096