1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 47 PetscErrorCode ierr; 48 49 PetscFunctionBegin; 50 ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 57 PetscErrorCode ierr; 58 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 59 60 PetscFunctionBegin; 61 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 62 lrow = row - rstart; 63 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 68 { 69 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 70 PetscErrorCode ierr; 71 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 72 73 PetscFunctionBegin; 74 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 75 lrow = row - rstart; 76 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 81 { 82 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 83 PetscErrorCode ierr; 84 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 85 PetscScalar *array; 86 MPI_Comm comm; 87 PetscBool flg; 88 Mat B; 89 90 PetscFunctionBegin; 91 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 92 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 93 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 94 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 95 96 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 97 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 98 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 99 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 100 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 101 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 102 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 103 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 104 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 105 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 107 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 108 *a = B; 109 ierr = MatDestroy(&B);CHKERRQ(ierr); 110 } else *a = B; 111 PetscFunctionReturn(0); 112 } 113 114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 117 PetscErrorCode ierr; 118 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 119 PetscBool roworiented = A->roworiented; 120 121 PetscFunctionBegin; 122 for (i=0; i<m; i++) { 123 if (idxm[i] < 0) continue; 124 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 125 if (idxm[i] >= rstart && idxm[i] < rend) { 126 row = idxm[i] - rstart; 127 if (roworiented) { 128 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 129 } else { 130 for (j=0; j<n; j++) { 131 if (idxn[j] < 0) continue; 132 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 133 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 134 } 135 } 136 } else if (!A->donotstash) { 137 mat->assembled = PETSC_FALSE; 138 if (roworiented) { 139 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 140 } else { 141 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 142 } 143 } 144 } 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 149 { 150 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 153 154 PetscFunctionBegin; 155 for (i=0; i<m; i++) { 156 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 157 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 158 if (idxm[i] >= rstart && idxm[i] < rend) { 159 row = idxm[i] - rstart; 160 for (j=0; j<n; j++) { 161 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 162 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 163 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 164 } 165 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscBool iscuda; 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 if (!a->A) { 188 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 189 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 190 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 191 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 192 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 193 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 194 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 195 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 196 } 197 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 230 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 235 { 236 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 241 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 242 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 247 { 248 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 249 PetscErrorCode ierr; 250 251 PetscFunctionBegin; 252 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 253 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 254 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 259 { 260 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 265 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 266 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 271 { 272 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 273 PetscErrorCode ierr; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 284 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 285 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 288 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 289 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 290 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 291 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 292 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 309 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 310 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 311 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 317 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 318 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 326 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 /* Free work space */ 333 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 334 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 335 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 371 { 372 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 373 PetscErrorCode ierr; 374 PetscInt nstash,reallocs; 375 376 PetscFunctionBegin; 377 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 378 379 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 380 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 381 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 386 { 387 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 388 PetscErrorCode ierr; 389 PetscInt i,*row,*col,flg,j,rstart,ncols; 390 PetscMPIInt n; 391 PetscScalar *val; 392 393 PetscFunctionBegin; 394 if (!mdn->donotstash && !mat->nooffprocentries) { 395 /* wait on receives */ 396 while (1) { 397 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 398 if (!flg) break; 399 400 for (i=0; i<n;) { 401 /* Now identify the consecutive vals belonging to the same row */ 402 for (j=i,rstart=row[j]; j<n; j++) { 403 if (row[j] != rstart) break; 404 } 405 if (j < n) ncols = j-i; 406 else ncols = n-i; 407 /* Now assemble all these values with a single function call */ 408 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 409 i = j; 410 } 411 } 412 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 413 } 414 415 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 417 418 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 419 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 425 { 426 PetscErrorCode ierr; 427 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 428 429 PetscFunctionBegin; 430 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 437 PetscErrorCode ierr; 438 PetscInt i,len,*lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 449 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 450 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 451 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 452 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 453 } 454 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 455 if (diag != 0.0) { 456 Vec d; 457 458 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 459 ierr = VecSet(d,diag);CHKERRQ(ierr); 460 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecDestroy(&d);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(lrows);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype,aymtype; 479 480 PetscFunctionBegin; 481 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 482 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 483 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 484 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 485 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 486 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 487 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype,aymtype; 498 499 PetscFunctionBegin; 500 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 501 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 502 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr); 503 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 504 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 505 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 506 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype,aymtype; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 520 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 522 ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr); 523 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 524 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 525 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 526 ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 PetscErrorCode ierr; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype,aymtype; 537 538 PetscFunctionBegin; 539 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 540 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 541 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 542 ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr); 543 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 544 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 545 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 546 ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 553 PetscErrorCode ierr; 554 PetscInt lda,len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 const PetscScalar *av; 557 558 PetscFunctionBegin; 559 ierr = VecSet(v,zero);CHKERRQ(ierr); 560 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 561 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 562 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 563 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 564 radd = A->rmap->rstart*m; 565 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 566 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 567 for (i=0; i<len; i++) { 568 x[i] = av[radd + i*lda + i]; 569 } 570 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 571 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatDestroy_MPIDense(Mat mat) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 #if defined(PETSC_USE_LOG) 582 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 583 #endif 584 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 585 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 586 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 587 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 588 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 589 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 590 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 591 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 592 593 ierr = PetscFree(mat->data);CHKERRQ(ierr); 594 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 595 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 607 #if defined(PETSC_HAVE_ELEMENTAL) 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 609 #endif 610 #if defined(PETSC_HAVE_SCALAPACK) 611 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 612 #endif 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 614 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 616 #if defined (PETSC_HAVE_CUDA) 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 618 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 619 #endif 620 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 621 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 622 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 623 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 624 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 625 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 628 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 634 635 #include <petscdraw.h> 636 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 637 { 638 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 639 PetscErrorCode ierr; 640 PetscMPIInt rank; 641 PetscViewerType vtype; 642 PetscBool iascii,isdraw; 643 PetscViewer sviewer; 644 PetscViewerFormat format; 645 646 PetscFunctionBegin; 647 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 648 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 649 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 650 if (iascii) { 651 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 652 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 653 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 654 MatInfo info; 655 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 656 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 657 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 658 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 660 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } else if (format == PETSC_VIEWER_ASCII_INFO) { 663 PetscFunctionReturn(0); 664 } 665 } else if (isdraw) { 666 PetscDraw draw; 667 PetscBool isnull; 668 669 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 670 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 671 if (isnull) PetscFunctionReturn(0); 672 } 673 674 { 675 /* assemble the entire matrix onto first processor. */ 676 Mat A; 677 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 678 PetscInt *cols; 679 PetscScalar *vals; 680 681 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 682 if (!rank) { 683 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 684 } else { 685 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 686 } 687 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 688 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 689 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 690 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 691 692 /* Copy the matrix ... This isn't the most efficient means, 693 but it's quick for now */ 694 A->insertmode = INSERT_VALUES; 695 696 row = mat->rmap->rstart; 697 m = mdn->A->rmap->n; 698 for (i=0; i<m; i++) { 699 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 700 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 702 row++; 703 } 704 705 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 707 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 708 if (!rank) { 709 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 710 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 711 } 712 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 713 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 714 ierr = MatDestroy(&A);CHKERRQ(ierr); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 720 { 721 PetscErrorCode ierr; 722 PetscBool iascii,isbinary,isdraw,issocket; 723 724 PetscFunctionBegin; 725 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 726 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 727 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 729 730 if (iascii || issocket || isdraw) { 731 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 732 } else if (isbinary) { 733 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 739 { 740 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 741 Mat mdn = mat->A; 742 PetscErrorCode ierr; 743 PetscLogDouble isend[5],irecv[5]; 744 745 PetscFunctionBegin; 746 info->block_size = 1.0; 747 748 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 749 750 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 751 isend[3] = info->memory; isend[4] = info->mallocs; 752 if (flag == MAT_LOCAL) { 753 info->nz_used = isend[0]; 754 info->nz_allocated = isend[1]; 755 info->nz_unneeded = isend[2]; 756 info->memory = isend[3]; 757 info->mallocs = isend[4]; 758 } else if (flag == MAT_GLOBAL_MAX) { 759 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 760 761 info->nz_used = irecv[0]; 762 info->nz_allocated = irecv[1]; 763 info->nz_unneeded = irecv[2]; 764 info->memory = irecv[3]; 765 info->mallocs = irecv[4]; 766 } else if (flag == MAT_GLOBAL_SUM) { 767 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 768 769 info->nz_used = irecv[0]; 770 info->nz_allocated = irecv[1]; 771 info->nz_unneeded = irecv[2]; 772 info->memory = irecv[3]; 773 info->mallocs = irecv[4]; 774 } 775 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 776 info->fill_ratio_needed = 0; 777 info->factor_mallocs = 0; 778 PetscFunctionReturn(0); 779 } 780 781 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 782 { 783 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 switch (op) { 788 case MAT_NEW_NONZERO_LOCATIONS: 789 case MAT_NEW_NONZERO_LOCATION_ERR: 790 case MAT_NEW_NONZERO_ALLOCATION_ERR: 791 MatCheckPreallocated(A,1); 792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 793 break; 794 case MAT_ROW_ORIENTED: 795 MatCheckPreallocated(A,1); 796 a->roworiented = flg; 797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 798 break; 799 case MAT_FORCE_DIAGONAL_ENTRIES: 800 case MAT_KEEP_NONZERO_PATTERN: 801 case MAT_USE_HASH_TABLE: 802 case MAT_SORTED_FULL: 803 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 804 break; 805 case MAT_IGNORE_OFF_PROC_ENTRIES: 806 a->donotstash = flg; 807 break; 808 case MAT_SYMMETRIC: 809 case MAT_STRUCTURALLY_SYMMETRIC: 810 case MAT_HERMITIAN: 811 case MAT_SYMMETRY_ETERNAL: 812 case MAT_IGNORE_LOWER_TRIANGULAR: 813 case MAT_IGNORE_ZERO_ENTRIES: 814 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 815 break; 816 default: 817 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 818 } 819 PetscFunctionReturn(0); 820 } 821 822 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 823 { 824 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 825 const PetscScalar *l; 826 PetscScalar x,*v,*vv,*r; 827 PetscErrorCode ierr; 828 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 829 830 PetscFunctionBegin; 831 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 832 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 833 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 834 if (ll) { 835 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 836 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 837 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 838 for (i=0; i<m; i++) { 839 x = l[i]; 840 v = vv + i; 841 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 842 } 843 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 844 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 845 } 846 if (rr) { 847 const PetscScalar *ar; 848 849 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 850 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 851 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 852 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 853 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 854 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 855 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 856 for (i=0; i<n; i++) { 857 x = r[i]; 858 v = vv + i*lda; 859 for (j=0; j<m; j++) (*v++) *= x; 860 } 861 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 862 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 863 } 864 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 869 { 870 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 871 PetscErrorCode ierr; 872 PetscInt i,j; 873 PetscMPIInt size; 874 PetscReal sum = 0.0; 875 const PetscScalar *av,*v; 876 877 PetscFunctionBegin; 878 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 879 v = av; 880 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 881 if (size == 1) { 882 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 883 } else { 884 if (type == NORM_FROBENIUS) { 885 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 886 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 887 } 888 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 889 *nrm = PetscSqrtReal(*nrm); 890 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 891 } else if (type == NORM_1) { 892 PetscReal *tmp,*tmp2; 893 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 894 *nrm = 0.0; 895 v = av; 896 for (j=0; j<mdn->A->cmap->n; j++) { 897 for (i=0; i<mdn->A->rmap->n; i++) { 898 tmp[j] += PetscAbsScalar(*v); v++; 899 } 900 } 901 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 902 for (j=0; j<A->cmap->N; j++) { 903 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 904 } 905 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 906 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 907 } else if (type == NORM_INFINITY) { /* max row norm */ 908 PetscReal ntemp; 909 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 910 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 911 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 912 } 913 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916 917 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 918 { 919 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 920 Mat B; 921 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 922 PetscErrorCode ierr; 923 PetscInt j,i,lda; 924 PetscScalar *v; 925 926 PetscFunctionBegin; 927 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 928 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 929 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 930 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 931 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 932 } else B = *matout; 933 934 m = a->A->rmap->n; n = a->A->cmap->n; 935 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 936 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 937 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 938 for (i=0; i<m; i++) rwork[i] = rstart + i; 939 for (j=0; j<n; j++) { 940 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 941 v += lda; 942 } 943 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 944 ierr = PetscFree(rwork);CHKERRQ(ierr); 945 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 946 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 947 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 948 *matout = B; 949 } else { 950 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 956 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 957 958 PetscErrorCode MatSetUp_MPIDense(Mat A) 959 { 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 964 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 965 if (!A->preallocated) { 966 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 972 { 973 PetscErrorCode ierr; 974 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 975 976 PetscFunctionBegin; 977 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatConjugate_MPIDense(Mat mat) 982 { 983 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 ierr = MatConjugate(a->A);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatRealPart_MPIDense(Mat A) 992 { 993 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 ierr = MatRealPart(a->A);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1002 { 1003 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1012 { 1013 PetscErrorCode ierr; 1014 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1015 1016 PetscFunctionBegin; 1017 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1018 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1019 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1024 1025 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1026 { 1027 PetscErrorCode ierr; 1028 PetscInt i,n; 1029 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1030 PetscReal *work; 1031 1032 PetscFunctionBegin; 1033 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1034 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1035 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1036 if (type == NORM_2) { 1037 for (i=0; i<n; i++) work[i] *= work[i]; 1038 } 1039 if (type == NORM_INFINITY) { 1040 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1041 } else { 1042 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1043 } 1044 ierr = PetscFree(work);CHKERRQ(ierr); 1045 if (type == NORM_2) { 1046 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #if defined(PETSC_HAVE_CUDA) 1052 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1053 { 1054 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt lda; 1057 1058 PetscFunctionBegin; 1059 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1060 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1061 if (!a->cvec) { 1062 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1063 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1064 } 1065 a->vecinuse = col + 1; 1066 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1067 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1068 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1069 *v = a->cvec; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1074 { 1075 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1076 PetscErrorCode ierr; 1077 1078 PetscFunctionBegin; 1079 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1080 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1081 a->vecinuse = 0; 1082 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1083 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1084 *v = NULL; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1089 { 1090 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1091 PetscInt lda; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1096 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1097 if (!a->cvec) { 1098 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1099 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1100 } 1101 a->vecinuse = col + 1; 1102 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1103 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1104 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1105 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1106 *v = a->cvec; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1111 { 1112 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1117 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1118 a->vecinuse = 0; 1119 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1120 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1121 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1122 *v = NULL; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1127 { 1128 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1129 PetscErrorCode ierr; 1130 PetscInt lda; 1131 1132 PetscFunctionBegin; 1133 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1134 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1135 if (!a->cvec) { 1136 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1137 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1138 } 1139 a->vecinuse = col + 1; 1140 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1141 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1142 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1143 *v = a->cvec; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1148 { 1149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1154 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1155 a->vecinuse = 0; 1156 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1157 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1158 *v = NULL; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1163 { 1164 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1169 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1170 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1175 { 1176 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1181 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1182 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1187 { 1188 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1193 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1194 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1199 { 1200 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1209 { 1210 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1219 { 1220 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1229 { 1230 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1239 { 1240 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1249 { 1250 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1259 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1260 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1261 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1262 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1263 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1264 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1265 1266 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1267 { 1268 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1273 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1274 if (d->A) { 1275 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1276 } 1277 mat->boundtocpu = bind; 1278 if (!bind) { 1279 PetscBool iscuda; 1280 1281 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1282 if (!iscuda) { 1283 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1284 } 1285 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1286 if (!iscuda) { 1287 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1288 } 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1295 } else { 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1302 } 1303 if (d->cmat) { 1304 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1310 { 1311 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1312 PetscErrorCode ierr; 1313 PetscBool iscuda; 1314 1315 PetscFunctionBegin; 1316 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1317 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1318 if (!iscuda) PetscFunctionReturn(0); 1319 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1320 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1321 if (!d->A) { 1322 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1323 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1324 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1325 } 1326 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1327 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1328 A->preallocated = PETSC_TRUE; 1329 PetscFunctionReturn(0); 1330 } 1331 #endif 1332 1333 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1334 { 1335 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1344 { 1345 PetscFunctionBegin; 1346 *missing = PETSC_FALSE; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1351 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1352 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1353 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1354 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1355 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1356 1357 /* -------------------------------------------------------------------*/ 1358 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1359 MatGetRow_MPIDense, 1360 MatRestoreRow_MPIDense, 1361 MatMult_MPIDense, 1362 /* 4*/ MatMultAdd_MPIDense, 1363 MatMultTranspose_MPIDense, 1364 MatMultTransposeAdd_MPIDense, 1365 NULL, 1366 NULL, 1367 NULL, 1368 /* 10*/ NULL, 1369 NULL, 1370 NULL, 1371 NULL, 1372 MatTranspose_MPIDense, 1373 /* 15*/ MatGetInfo_MPIDense, 1374 MatEqual_MPIDense, 1375 MatGetDiagonal_MPIDense, 1376 MatDiagonalScale_MPIDense, 1377 MatNorm_MPIDense, 1378 /* 20*/ MatAssemblyBegin_MPIDense, 1379 MatAssemblyEnd_MPIDense, 1380 MatSetOption_MPIDense, 1381 MatZeroEntries_MPIDense, 1382 /* 24*/ MatZeroRows_MPIDense, 1383 NULL, 1384 NULL, 1385 NULL, 1386 NULL, 1387 /* 29*/ MatSetUp_MPIDense, 1388 NULL, 1389 NULL, 1390 MatGetDiagonalBlock_MPIDense, 1391 NULL, 1392 /* 34*/ MatDuplicate_MPIDense, 1393 NULL, 1394 NULL, 1395 NULL, 1396 NULL, 1397 /* 39*/ MatAXPY_MPIDense, 1398 MatCreateSubMatrices_MPIDense, 1399 NULL, 1400 MatGetValues_MPIDense, 1401 MatCopy_MPIDense, 1402 /* 44*/ NULL, 1403 MatScale_MPIDense, 1404 MatShift_Basic, 1405 NULL, 1406 NULL, 1407 /* 49*/ MatSetRandom_MPIDense, 1408 NULL, 1409 NULL, 1410 NULL, 1411 NULL, 1412 /* 54*/ NULL, 1413 NULL, 1414 NULL, 1415 NULL, 1416 NULL, 1417 /* 59*/ MatCreateSubMatrix_MPIDense, 1418 MatDestroy_MPIDense, 1419 MatView_MPIDense, 1420 NULL, 1421 NULL, 1422 /* 64*/ NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 NULL, 1427 /* 69*/ NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 NULL, 1432 /* 74*/ NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /* 79*/ NULL, 1438 NULL, 1439 NULL, 1440 NULL, 1441 /* 83*/ MatLoad_MPIDense, 1442 NULL, 1443 NULL, 1444 NULL, 1445 NULL, 1446 NULL, 1447 /* 89*/ NULL, 1448 NULL, 1449 NULL, 1450 NULL, 1451 NULL, 1452 /* 94*/ NULL, 1453 NULL, 1454 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1455 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1456 NULL, 1457 /* 99*/ MatProductSetFromOptions_MPIDense, 1458 NULL, 1459 NULL, 1460 MatConjugate_MPIDense, 1461 NULL, 1462 /*104*/ NULL, 1463 MatRealPart_MPIDense, 1464 MatImaginaryPart_MPIDense, 1465 NULL, 1466 NULL, 1467 /*109*/ NULL, 1468 NULL, 1469 NULL, 1470 MatGetColumnVector_MPIDense, 1471 MatMissingDiagonal_MPIDense, 1472 /*114*/ NULL, 1473 NULL, 1474 NULL, 1475 NULL, 1476 NULL, 1477 /*119*/ NULL, 1478 NULL, 1479 NULL, 1480 NULL, 1481 NULL, 1482 /*124*/ NULL, 1483 MatGetColumnNorms_MPIDense, 1484 NULL, 1485 NULL, 1486 NULL, 1487 /*129*/ NULL, 1488 NULL, 1489 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1490 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1491 NULL, 1492 /*134*/ NULL, 1493 NULL, 1494 NULL, 1495 NULL, 1496 NULL, 1497 /*139*/ NULL, 1498 NULL, 1499 NULL, 1500 NULL, 1501 NULL, 1502 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1503 /*145*/ NULL, 1504 NULL, 1505 NULL 1506 }; 1507 1508 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1509 { 1510 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1511 PetscBool iscuda; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1516 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1517 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1518 if (!a->A) { 1519 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1520 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1521 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1522 } 1523 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1524 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1525 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1526 mat->preallocated = PETSC_TRUE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #if defined(PETSC_HAVE_ELEMENTAL) 1531 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1532 { 1533 Mat mat_elemental; 1534 PetscErrorCode ierr; 1535 PetscScalar *v; 1536 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1537 1538 PetscFunctionBegin; 1539 if (reuse == MAT_REUSE_MATRIX) { 1540 mat_elemental = *newmat; 1541 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1542 } else { 1543 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1544 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1545 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1546 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1547 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1548 } 1549 1550 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1551 for (i=0; i<N; i++) cols[i] = i; 1552 for (i=0; i<m; i++) rows[i] = rstart + i; 1553 1554 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1555 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1556 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1557 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1558 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1559 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1560 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1561 1562 if (reuse == MAT_INPLACE_MATRIX) { 1563 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1564 } else { 1565 *newmat = mat_elemental; 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 #endif 1570 1571 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1572 { 1573 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1574 PetscErrorCode ierr; 1575 1576 PetscFunctionBegin; 1577 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1582 { 1583 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1592 { 1593 PetscErrorCode ierr; 1594 Mat_MPIDense *mat; 1595 PetscInt m,nloc,N; 1596 1597 PetscFunctionBegin; 1598 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1599 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1600 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1601 PetscInt sum; 1602 1603 if (n == PETSC_DECIDE) { 1604 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1605 } 1606 /* Check sum(n) = N */ 1607 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1608 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1609 1610 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1611 } 1612 1613 /* numeric phase */ 1614 mat = (Mat_MPIDense*)(*outmat)->data; 1615 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1616 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1617 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #if defined(PETSC_HAVE_CUDA) 1622 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1623 { 1624 Mat B; 1625 Mat_MPIDense *m; 1626 PetscErrorCode ierr; 1627 1628 PetscFunctionBegin; 1629 if (reuse == MAT_INITIAL_MATRIX) { 1630 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1631 } else if (reuse == MAT_REUSE_MATRIX) { 1632 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1633 } 1634 1635 B = *newmat; 1636 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1637 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1638 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1639 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1654 m = (Mat_MPIDense*)(B)->data; 1655 if (m->A) { 1656 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1657 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1658 } 1659 B->ops->bindtocpu = NULL; 1660 B->offloadmask = PETSC_OFFLOAD_CPU; 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1665 { 1666 Mat B; 1667 Mat_MPIDense *m; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 if (reuse == MAT_INITIAL_MATRIX) { 1672 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1673 } else if (reuse == MAT_REUSE_MATRIX) { 1674 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1675 } 1676 1677 B = *newmat; 1678 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1679 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1680 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1695 m = (Mat_MPIDense*)(B)->data; 1696 if (m->A) { 1697 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1698 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1699 B->offloadmask = PETSC_OFFLOAD_BOTH; 1700 } else { 1701 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1702 } 1703 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1704 1705 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1706 PetscFunctionReturn(0); 1707 } 1708 #endif 1709 1710 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1711 { 1712 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1713 PetscErrorCode ierr; 1714 PetscInt lda; 1715 1716 PetscFunctionBegin; 1717 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1718 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1719 if (!a->cvec) { 1720 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1721 } 1722 a->vecinuse = col + 1; 1723 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1724 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1725 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1726 *v = a->cvec; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1731 { 1732 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1737 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1738 a->vecinuse = 0; 1739 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1740 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1741 *v = NULL; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1746 { 1747 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1748 PetscErrorCode ierr; 1749 PetscInt lda; 1750 1751 PetscFunctionBegin; 1752 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1753 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1754 if (!a->cvec) { 1755 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1756 } 1757 a->vecinuse = col + 1; 1758 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1759 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1760 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1761 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1762 *v = a->cvec; 1763 PetscFunctionReturn(0); 1764 } 1765 1766 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1767 { 1768 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1773 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1774 a->vecinuse = 0; 1775 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1776 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1777 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1778 *v = NULL; 1779 PetscFunctionReturn(0); 1780 } 1781 1782 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1783 { 1784 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1785 PetscErrorCode ierr; 1786 PetscInt lda; 1787 1788 PetscFunctionBegin; 1789 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1790 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1791 if (!a->cvec) { 1792 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1793 } 1794 a->vecinuse = col + 1; 1795 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1796 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1797 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1798 *v = a->cvec; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1803 { 1804 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1805 PetscErrorCode ierr; 1806 1807 PetscFunctionBegin; 1808 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1809 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1810 a->vecinuse = 0; 1811 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1812 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1813 *v = NULL; 1814 PetscFunctionReturn(0); 1815 } 1816 1817 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1818 { 1819 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1820 Mat_MPIDense *c; 1821 PetscErrorCode ierr; 1822 MPI_Comm comm; 1823 PetscBool setup = PETSC_FALSE; 1824 1825 PetscFunctionBegin; 1826 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1827 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1828 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1829 if (!a->cmat) { 1830 setup = PETSC_TRUE; 1831 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1832 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1833 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1834 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1835 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1836 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1837 } else if (cend-cbegin != a->cmat->cmap->N) { 1838 setup = PETSC_TRUE; 1839 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1840 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1841 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1842 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1843 } 1844 c = (Mat_MPIDense*)a->cmat->data; 1845 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1846 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1847 if (setup) { /* do we really need this? */ 1848 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1849 } 1850 a->cmat->preallocated = PETSC_TRUE; 1851 a->cmat->assembled = PETSC_TRUE; 1852 a->matinuse = cbegin + 1; 1853 *v = a->cmat; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1858 { 1859 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1860 Mat_MPIDense *c; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1865 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1866 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1867 a->matinuse = 0; 1868 c = (Mat_MPIDense*)a->cmat->data; 1869 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1870 *v = NULL; 1871 PetscFunctionReturn(0); 1872 } 1873 1874 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1875 { 1876 Mat_MPIDense *a; 1877 PetscErrorCode ierr; 1878 1879 PetscFunctionBegin; 1880 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1881 mat->data = (void*)a; 1882 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1883 1884 mat->insertmode = NOT_SET_VALUES; 1885 1886 /* build cache for off array entries formed */ 1887 a->donotstash = PETSC_FALSE; 1888 1889 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1890 1891 /* stuff used for matrix vector multiply */ 1892 a->lvec = NULL; 1893 a->Mvctx = NULL; 1894 a->roworiented = PETSC_TRUE; 1895 1896 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1898 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1899 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1900 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1915 #if defined(PETSC_HAVE_ELEMENTAL) 1916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1917 #endif 1918 #if defined(PETSC_HAVE_SCALAPACK) 1919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1920 #endif 1921 #if defined(PETSC_HAVE_CUDA) 1922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1923 #endif 1924 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1925 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1926 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1927 #if defined(PETSC_HAVE_CUDA) 1928 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1929 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1930 #endif 1931 1932 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1933 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1934 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 /*MC 1939 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1940 1941 Options Database Keys: 1942 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1943 1944 Level: beginner 1945 1946 .seealso: 1947 1948 M*/ 1949 #if defined(PETSC_HAVE_CUDA) 1950 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1951 { 1952 PetscErrorCode ierr; 1953 1954 PetscFunctionBegin; 1955 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 1956 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1957 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 #endif 1961 1962 /*MC 1963 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1964 1965 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1966 and MATMPIDENSE otherwise. 1967 1968 Options Database Keys: 1969 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1970 1971 Level: beginner 1972 1973 1974 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1975 M*/ 1976 1977 /*MC 1978 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1979 1980 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1981 and MATMPIDENSECUDA otherwise. 1982 1983 Options Database Keys: 1984 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1985 1986 Level: beginner 1987 1988 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1989 M*/ 1990 1991 /*@C 1992 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1993 1994 Collective 1995 1996 Input Parameters: 1997 . B - the matrix 1998 - data - optional location of matrix data. Set data=NULL for PETSc 1999 to control all matrix memory allocation. 2000 2001 Notes: 2002 The dense format is fully compatible with standard Fortran 77 2003 storage by columns. 2004 2005 The data input variable is intended primarily for Fortran programmers 2006 who wish to allocate their own matrix memory space. Most users should 2007 set data=NULL. 2008 2009 Level: intermediate 2010 2011 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2012 @*/ 2013 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2014 { 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2019 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 /*@ 2024 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2025 array provided by the user. This is useful to avoid copying an array 2026 into a matrix 2027 2028 Not Collective 2029 2030 Input Parameters: 2031 + mat - the matrix 2032 - array - the array in column major order 2033 2034 Notes: 2035 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2036 freed when the matrix is destroyed. 2037 2038 Level: developer 2039 2040 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2041 2042 @*/ 2043 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2044 { 2045 PetscErrorCode ierr; 2046 2047 PetscFunctionBegin; 2048 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2049 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2050 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2051 #if defined(PETSC_HAVE_CUDA) 2052 mat->offloadmask = PETSC_OFFLOAD_CPU; 2053 #endif 2054 PetscFunctionReturn(0); 2055 } 2056 2057 /*@ 2058 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2059 2060 Not Collective 2061 2062 Input Parameters: 2063 . mat - the matrix 2064 2065 Notes: 2066 You can only call this after a call to MatDensePlaceArray() 2067 2068 Level: developer 2069 2070 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2071 2072 @*/ 2073 PetscErrorCode MatDenseResetArray(Mat mat) 2074 { 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2079 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2080 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 /*@ 2085 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2086 array provided by the user. This is useful to avoid copying an array 2087 into a matrix 2088 2089 Not Collective 2090 2091 Input Parameters: 2092 + mat - the matrix 2093 - array - the array in column major order 2094 2095 Notes: 2096 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2097 freed by the user. It will be freed when the matrix is destroyed. 2098 2099 Level: developer 2100 2101 .seealso: MatDenseGetArray(), VecReplaceArray() 2102 @*/ 2103 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2104 { 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2109 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2110 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2111 #if defined(PETSC_HAVE_CUDA) 2112 mat->offloadmask = PETSC_OFFLOAD_CPU; 2113 #endif 2114 PetscFunctionReturn(0); 2115 } 2116 2117 #if defined(PETSC_HAVE_CUDA) 2118 /*@C 2119 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2120 array provided by the user. This is useful to avoid copying an array 2121 into a matrix 2122 2123 Not Collective 2124 2125 Input Parameters: 2126 + mat - the matrix 2127 - array - the array in column major order 2128 2129 Notes: 2130 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2131 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2132 2133 Level: developer 2134 2135 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2136 @*/ 2137 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2138 { 2139 PetscErrorCode ierr; 2140 2141 PetscFunctionBegin; 2142 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2143 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2144 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2145 mat->offloadmask = PETSC_OFFLOAD_GPU; 2146 PetscFunctionReturn(0); 2147 } 2148 2149 /*@C 2150 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2151 2152 Not Collective 2153 2154 Input Parameters: 2155 . mat - the matrix 2156 2157 Notes: 2158 You can only call this after a call to MatDenseCUDAPlaceArray() 2159 2160 Level: developer 2161 2162 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2163 2164 @*/ 2165 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2166 { 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2171 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2172 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /*@C 2177 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2178 array provided by the user. This is useful to avoid copying an array 2179 into a matrix 2180 2181 Not Collective 2182 2183 Input Parameters: 2184 + mat - the matrix 2185 - array - the array in column major order 2186 2187 Notes: 2188 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2189 The memory passed in CANNOT be freed by the user. It will be freed 2190 when the matrix is destroyed. The array should respect the matrix leading dimension. 2191 2192 Level: developer 2193 2194 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2195 @*/ 2196 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2197 { 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2202 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2203 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2204 mat->offloadmask = PETSC_OFFLOAD_GPU; 2205 PetscFunctionReturn(0); 2206 } 2207 2208 /*@C 2209 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2210 2211 Not Collective 2212 2213 Input Parameters: 2214 . A - the matrix 2215 2216 Output Parameters 2217 . array - the GPU array in column major order 2218 2219 Notes: 2220 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2221 2222 Level: developer 2223 2224 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2225 @*/ 2226 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2227 { 2228 PetscErrorCode ierr; 2229 2230 PetscFunctionBegin; 2231 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2232 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2233 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 2237 /*@C 2238 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2239 2240 Not Collective 2241 2242 Input Parameters: 2243 + A - the matrix 2244 - array - the GPU array in column major order 2245 2246 Notes: 2247 2248 Level: developer 2249 2250 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2251 @*/ 2252 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2253 { 2254 PetscErrorCode ierr; 2255 2256 PetscFunctionBegin; 2257 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2258 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2259 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2260 A->offloadmask = PETSC_OFFLOAD_GPU; 2261 PetscFunctionReturn(0); 2262 } 2263 2264 /*@C 2265 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2266 2267 Not Collective 2268 2269 Input Parameters: 2270 . A - the matrix 2271 2272 Output Parameters 2273 . array - the GPU array in column major order 2274 2275 Notes: 2276 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2277 2278 Level: developer 2279 2280 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2281 @*/ 2282 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2283 { 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2288 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 /*@C 2293 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2294 2295 Not Collective 2296 2297 Input Parameters: 2298 + A - the matrix 2299 - array - the GPU array in column major order 2300 2301 Notes: 2302 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2303 2304 Level: developer 2305 2306 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2307 @*/ 2308 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2309 { 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 /*@C 2318 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2319 2320 Not Collective 2321 2322 Input Parameters: 2323 . A - the matrix 2324 2325 Output Parameters 2326 . array - the GPU array in column major order 2327 2328 Notes: 2329 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2330 2331 Level: developer 2332 2333 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2334 @*/ 2335 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2336 { 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2341 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2342 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 /*@C 2347 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2348 2349 Not Collective 2350 2351 Input Parameters: 2352 + A - the matrix 2353 - array - the GPU array in column major order 2354 2355 Notes: 2356 2357 Level: developer 2358 2359 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2360 @*/ 2361 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2362 { 2363 PetscErrorCode ierr; 2364 2365 PetscFunctionBegin; 2366 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2367 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2368 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2369 A->offloadmask = PETSC_OFFLOAD_GPU; 2370 PetscFunctionReturn(0); 2371 } 2372 #endif 2373 2374 /*@C 2375 MatCreateDense - Creates a matrix in dense format. 2376 2377 Collective 2378 2379 Input Parameters: 2380 + comm - MPI communicator 2381 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2382 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2383 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2384 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2385 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2386 to control all matrix memory allocation. 2387 2388 Output Parameter: 2389 . A - the matrix 2390 2391 Notes: 2392 The dense format is fully compatible with standard Fortran 77 2393 storage by columns. 2394 2395 The data input variable is intended primarily for Fortran programmers 2396 who wish to allocate their own matrix memory space. Most users should 2397 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2398 2399 The user MUST specify either the local or global matrix dimensions 2400 (possibly both). 2401 2402 Level: intermediate 2403 2404 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2405 @*/ 2406 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2407 { 2408 PetscErrorCode ierr; 2409 PetscMPIInt size; 2410 2411 PetscFunctionBegin; 2412 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2413 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2414 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2415 if (size > 1) { 2416 PetscBool havedata = (PetscBool)!!data; 2417 2418 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2419 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2420 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 2421 if (havedata) { /* user provided data array, so no need to assemble */ 2422 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2423 (*A)->assembled = PETSC_TRUE; 2424 } 2425 } else { 2426 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2427 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2428 } 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #if defined(PETSC_HAVE_CUDA) 2433 /*@C 2434 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2435 2436 Collective 2437 2438 Input Parameters: 2439 + comm - MPI communicator 2440 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2441 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2442 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2443 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2444 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2445 to control matrix memory allocation. 2446 2447 Output Parameter: 2448 . A - the matrix 2449 2450 Notes: 2451 2452 Level: intermediate 2453 2454 .seealso: MatCreate(), MatCreateDense() 2455 @*/ 2456 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2457 { 2458 PetscErrorCode ierr; 2459 PetscMPIInt size; 2460 2461 PetscFunctionBegin; 2462 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2463 PetscValidLogicalCollectiveBool(*A,!!data,6); 2464 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2465 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2466 if (size > 1) { 2467 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2468 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2469 if (data) { /* user provided data array, so no need to assemble */ 2470 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2471 (*A)->assembled = PETSC_TRUE; 2472 } 2473 } else { 2474 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2475 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2476 } 2477 PetscFunctionReturn(0); 2478 } 2479 #endif 2480 2481 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2482 { 2483 Mat mat; 2484 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2485 PetscErrorCode ierr; 2486 2487 PetscFunctionBegin; 2488 *newmat = NULL; 2489 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2490 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2491 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2492 a = (Mat_MPIDense*)mat->data; 2493 2494 mat->factortype = A->factortype; 2495 mat->assembled = PETSC_TRUE; 2496 mat->preallocated = PETSC_TRUE; 2497 2498 mat->insertmode = NOT_SET_VALUES; 2499 a->donotstash = oldmat->donotstash; 2500 2501 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2502 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2503 2504 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2505 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2506 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2507 2508 *newmat = mat; 2509 PetscFunctionReturn(0); 2510 } 2511 2512 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2513 { 2514 PetscErrorCode ierr; 2515 PetscBool isbinary; 2516 #if defined(PETSC_HAVE_HDF5) 2517 PetscBool ishdf5; 2518 #endif 2519 2520 PetscFunctionBegin; 2521 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2522 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2523 /* force binary viewer to load .info file if it has not yet done so */ 2524 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2525 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2526 #if defined(PETSC_HAVE_HDF5) 2527 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2528 #endif 2529 if (isbinary) { 2530 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2531 #if defined(PETSC_HAVE_HDF5) 2532 } else if (ishdf5) { 2533 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2534 #endif 2535 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2540 { 2541 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2542 Mat a,b; 2543 PetscBool flg; 2544 PetscErrorCode ierr; 2545 2546 PetscFunctionBegin; 2547 a = matA->A; 2548 b = matB->A; 2549 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2550 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2555 { 2556 PetscErrorCode ierr; 2557 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2558 2559 PetscFunctionBegin; 2560 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2561 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2562 ierr = PetscFree(atb);CHKERRQ(ierr); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2567 { 2568 PetscErrorCode ierr; 2569 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2570 2571 PetscFunctionBegin; 2572 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2573 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2574 ierr = PetscFree(abt);CHKERRQ(ierr); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2579 { 2580 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2581 Mat_TransMatMultDense *atb; 2582 PetscErrorCode ierr; 2583 MPI_Comm comm; 2584 PetscMPIInt size,*recvcounts; 2585 PetscScalar *carray,*sendbuf; 2586 const PetscScalar *atbarray; 2587 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2588 const PetscInt *ranges; 2589 2590 PetscFunctionBegin; 2591 MatCheckProduct(C,3); 2592 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2593 atb = (Mat_TransMatMultDense *)C->product->data; 2594 recvcounts = atb->recvcounts; 2595 sendbuf = atb->sendbuf; 2596 2597 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2598 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2599 2600 /* compute atbarray = aseq^T * bseq */ 2601 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2602 2603 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2604 2605 /* arrange atbarray into sendbuf */ 2606 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2607 for (proc=0, k=0; proc<size; proc++) { 2608 for (j=0; j<cN; j++) { 2609 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2610 } 2611 } 2612 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2613 2614 /* sum all atbarray to local values of C */ 2615 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2616 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr); 2617 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2618 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2619 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2624 { 2625 PetscErrorCode ierr; 2626 MPI_Comm comm; 2627 PetscMPIInt size; 2628 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2629 Mat_TransMatMultDense *atb; 2630 PetscBool cisdense; 2631 PetscInt i; 2632 const PetscInt *ranges; 2633 2634 PetscFunctionBegin; 2635 MatCheckProduct(C,3); 2636 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2637 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2638 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2639 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2640 } 2641 2642 /* create matrix product C */ 2643 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2644 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2645 if (!cisdense) { 2646 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2647 } 2648 ierr = MatSetUp(C);CHKERRQ(ierr); 2649 2650 /* create data structure for reuse C */ 2651 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2652 ierr = PetscNew(&atb);CHKERRQ(ierr); 2653 cM = C->rmap->N; 2654 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2655 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2656 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2657 2658 C->product->data = atb; 2659 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2660 PetscFunctionReturn(0); 2661 } 2662 2663 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2664 { 2665 PetscErrorCode ierr; 2666 MPI_Comm comm; 2667 PetscMPIInt i, size; 2668 PetscInt maxRows, bufsiz; 2669 PetscMPIInt tag; 2670 PetscInt alg; 2671 Mat_MatTransMultDense *abt; 2672 Mat_Product *product = C->product; 2673 PetscBool flg; 2674 2675 PetscFunctionBegin; 2676 MatCheckProduct(C,4); 2677 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2678 /* check local size of A and B */ 2679 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2680 2681 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2682 alg = flg ? 0 : 1; 2683 2684 /* setup matrix product C */ 2685 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2686 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2687 ierr = MatSetUp(C);CHKERRQ(ierr); 2688 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2689 2690 /* create data structure for reuse C */ 2691 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2692 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2693 ierr = PetscNew(&abt);CHKERRQ(ierr); 2694 abt->tag = tag; 2695 abt->alg = alg; 2696 switch (alg) { 2697 case 1: /* alg: "cyclic" */ 2698 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2699 bufsiz = A->cmap->N * maxRows; 2700 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2701 break; 2702 default: /* alg: "allgatherv" */ 2703 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2704 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2705 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2706 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2707 break; 2708 } 2709 2710 C->product->data = abt; 2711 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2712 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2713 PetscFunctionReturn(0); 2714 } 2715 2716 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2717 { 2718 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2719 Mat_MatTransMultDense *abt; 2720 PetscErrorCode ierr; 2721 MPI_Comm comm; 2722 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2723 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2724 PetscInt i,cK=A->cmap->N,k,j,bn; 2725 PetscScalar _DOne=1.0,_DZero=0.0; 2726 const PetscScalar *av,*bv; 2727 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2728 MPI_Request reqs[2]; 2729 const PetscInt *ranges; 2730 2731 PetscFunctionBegin; 2732 MatCheckProduct(C,3); 2733 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2734 abt = (Mat_MatTransMultDense*)C->product->data; 2735 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2736 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2737 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2738 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2739 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2740 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2741 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2742 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2743 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2745 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2747 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2748 bn = B->rmap->n; 2749 if (blda == bn) { 2750 sendbuf = (PetscScalar*)bv; 2751 } else { 2752 sendbuf = abt->buf[0]; 2753 for (k = 0, i = 0; i < cK; i++) { 2754 for (j = 0; j < bn; j++, k++) { 2755 sendbuf[k] = bv[i * blda + j]; 2756 } 2757 } 2758 } 2759 if (size > 1) { 2760 sendto = (rank + size - 1) % size; 2761 recvfrom = (rank + size + 1) % size; 2762 } else { 2763 sendto = recvfrom = 0; 2764 } 2765 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2766 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2767 recvisfrom = rank; 2768 for (i = 0; i < size; i++) { 2769 /* we have finished receiving in sending, bufs can be read/modified */ 2770 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2771 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2772 2773 if (nextrecvisfrom != rank) { 2774 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2775 sendsiz = cK * bn; 2776 recvsiz = cK * nextbn; 2777 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2778 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr); 2779 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr); 2780 } 2781 2782 /* local aseq * sendbuf^T */ 2783 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2784 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2785 2786 if (nextrecvisfrom != rank) { 2787 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2788 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 2789 } 2790 bn = nextbn; 2791 recvisfrom = nextrecvisfrom; 2792 sendbuf = recvbuf; 2793 } 2794 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2795 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2796 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2797 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2798 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2803 { 2804 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2805 Mat_MatTransMultDense *abt; 2806 PetscErrorCode ierr; 2807 MPI_Comm comm; 2808 PetscMPIInt size; 2809 PetscScalar *cv, *sendbuf, *recvbuf; 2810 const PetscScalar *av,*bv; 2811 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2812 PetscScalar _DOne=1.0,_DZero=0.0; 2813 PetscBLASInt cm, cn, ck, alda, clda; 2814 2815 PetscFunctionBegin; 2816 MatCheckProduct(C,3); 2817 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2818 abt = (Mat_MatTransMultDense*)C->product->data; 2819 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2820 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2821 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2822 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2823 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2824 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2825 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2826 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2827 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2828 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2829 /* copy transpose of B into buf[0] */ 2830 bn = B->rmap->n; 2831 sendbuf = abt->buf[0]; 2832 recvbuf = abt->buf[1]; 2833 for (k = 0, j = 0; j < bn; j++) { 2834 for (i = 0; i < cK; i++, k++) { 2835 sendbuf[k] = bv[i * blda + j]; 2836 } 2837 } 2838 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2839 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr); 2840 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2841 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2842 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2843 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2844 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2845 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2846 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2847 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2848 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2849 PetscFunctionReturn(0); 2850 } 2851 2852 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2853 { 2854 Mat_MatTransMultDense *abt; 2855 PetscErrorCode ierr; 2856 2857 PetscFunctionBegin; 2858 MatCheckProduct(C,3); 2859 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2860 abt = (Mat_MatTransMultDense*)C->product->data; 2861 switch (abt->alg) { 2862 case 1: 2863 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2864 break; 2865 default: 2866 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2867 break; 2868 } 2869 PetscFunctionReturn(0); 2870 } 2871 2872 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2873 { 2874 PetscErrorCode ierr; 2875 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2876 2877 PetscFunctionBegin; 2878 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2879 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2880 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2881 ierr = PetscFree(ab);CHKERRQ(ierr); 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #if defined(PETSC_HAVE_ELEMENTAL) 2886 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2887 { 2888 PetscErrorCode ierr; 2889 Mat_MatMultDense *ab; 2890 2891 PetscFunctionBegin; 2892 MatCheckProduct(C,3); 2893 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2894 ab = (Mat_MatMultDense*)C->product->data; 2895 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2896 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2897 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2898 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2899 PetscFunctionReturn(0); 2900 } 2901 2902 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2903 { 2904 PetscErrorCode ierr; 2905 Mat Ae,Be,Ce; 2906 Mat_MatMultDense *ab; 2907 2908 PetscFunctionBegin; 2909 MatCheckProduct(C,4); 2910 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2911 /* check local size of A and B */ 2912 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2913 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2914 } 2915 2916 /* create elemental matrices Ae and Be */ 2917 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2918 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2919 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2920 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2921 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2922 2923 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2924 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2925 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2926 ierr = MatSetUp(Be);CHKERRQ(ierr); 2927 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2928 2929 /* compute symbolic Ce = Ae*Be */ 2930 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2931 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2932 2933 /* setup C */ 2934 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2935 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2936 ierr = MatSetUp(C);CHKERRQ(ierr); 2937 2938 /* create data structure for reuse Cdense */ 2939 ierr = PetscNew(&ab);CHKERRQ(ierr); 2940 ab->Ae = Ae; 2941 ab->Be = Be; 2942 ab->Ce = Ce; 2943 2944 C->product->data = ab; 2945 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2946 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2947 PetscFunctionReturn(0); 2948 } 2949 #endif 2950 /* ----------------------------------------------- */ 2951 #if defined(PETSC_HAVE_ELEMENTAL) 2952 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2953 { 2954 PetscFunctionBegin; 2955 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2956 C->ops->productsymbolic = MatProductSymbolic_AB; 2957 PetscFunctionReturn(0); 2958 } 2959 #endif 2960 2961 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2962 { 2963 Mat_Product *product = C->product; 2964 Mat A = product->A,B=product->B; 2965 2966 PetscFunctionBegin; 2967 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2968 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2969 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2970 C->ops->productsymbolic = MatProductSymbolic_AtB; 2971 PetscFunctionReturn(0); 2972 } 2973 2974 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2975 { 2976 PetscErrorCode ierr; 2977 Mat_Product *product = C->product; 2978 const char *algTypes[2] = {"allgatherv","cyclic"}; 2979 PetscInt alg,nalg = 2; 2980 PetscBool flg = PETSC_FALSE; 2981 2982 PetscFunctionBegin; 2983 /* Set default algorithm */ 2984 alg = 0; /* default is allgatherv */ 2985 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2986 if (flg) { 2987 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2988 } 2989 2990 /* Get runtime option */ 2991 if (product->api_user) { 2992 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2993 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2994 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2995 } else { 2996 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2997 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2998 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2999 } 3000 if (flg) { 3001 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3002 } 3003 3004 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3005 C->ops->productsymbolic = MatProductSymbolic_ABt; 3006 PetscFunctionReturn(0); 3007 } 3008 3009 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3010 { 3011 PetscErrorCode ierr; 3012 Mat_Product *product = C->product; 3013 3014 PetscFunctionBegin; 3015 switch (product->type) { 3016 #if defined(PETSC_HAVE_ELEMENTAL) 3017 case MATPRODUCT_AB: 3018 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3019 break; 3020 #endif 3021 case MATPRODUCT_AtB: 3022 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3023 break; 3024 case MATPRODUCT_ABt: 3025 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3026 break; 3027 default: 3028 break; 3029 } 3030 PetscFunctionReturn(0); 3031 } 3032