1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 59 PetscErrorCode ierr; 60 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 61 62 PetscFunctionBegin; 63 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 64 lrow = row - rstart; 65 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 PetscBool flg; 77 Mat B; 78 79 PetscFunctionBegin; 80 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 81 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 82 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 83 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 84 85 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 86 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 87 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 88 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 89 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 90 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 91 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 92 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 93 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 97 *a = B; 98 ierr = MatDestroy(&B);CHKERRQ(ierr); 99 } else *a = B; 100 PetscFunctionReturn(0); 101 } 102 103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 104 { 105 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 106 PetscErrorCode ierr; 107 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 108 PetscBool roworiented = A->roworiented; 109 110 PetscFunctionBegin; 111 for (i=0; i<m; i++) { 112 if (idxm[i] < 0) continue; 113 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 114 if (idxm[i] >= rstart && idxm[i] < rend) { 115 row = idxm[i] - rstart; 116 if (roworiented) { 117 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 118 } else { 119 for (j=0; j<n; j++) { 120 if (idxn[j] < 0) continue; 121 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 122 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 123 } 124 } 125 } else if (!A->donotstash) { 126 mat->assembled = PETSC_FALSE; 127 if (roworiented) { 128 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 129 } else { 130 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 160 { 161 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 162 PetscErrorCode ierr; 163 164 PetscFunctionBegin; 165 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 170 { 171 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 172 PetscBool iscuda; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 if (!a->A) { 177 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 178 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 179 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 180 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 181 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 182 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 183 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 184 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 185 } 186 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 191 { 192 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 197 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 230 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 231 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 236 { 237 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 238 PetscErrorCode ierr; 239 240 PetscFunctionBegin; 241 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 242 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 243 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 248 { 249 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 250 PetscErrorCode ierr; 251 252 PetscFunctionBegin; 253 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 254 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 255 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 260 { 261 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 262 PetscErrorCode ierr; 263 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 264 const PetscInt *irow,*icol; 265 const PetscScalar *v; 266 PetscScalar *bv; 267 Mat newmat; 268 IS iscol_local; 269 MPI_Comm comm_is,comm_mat; 270 271 PetscFunctionBegin; 272 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 273 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 274 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 275 276 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 278 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 279 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 280 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 281 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 282 283 /* No parallel redistribution currently supported! Should really check each index set 284 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 285 original matrix! */ 286 287 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 288 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 289 290 /* Check submatrix call */ 291 if (scall == MAT_REUSE_MATRIX) { 292 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 293 /* Really need to test rows and column sizes! */ 294 newmat = *B; 295 } else { 296 /* Create and fill new matrix */ 297 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 298 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 299 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 300 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 301 } 302 303 /* Now extract the data pointers and do the copy, column at a time */ 304 newmatd = (Mat_MPIDense*)newmat->data; 305 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 306 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 307 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 308 for (i=0; i<Ncols; i++) { 309 const PetscScalar *av = v + lda*icol[i]; 310 for (j=0; j<nrows; j++) { 311 *bv++ = av[irow[j] - rstart]; 312 } 313 } 314 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 315 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 316 317 /* Assemble the matrices so that the correct flags are set */ 318 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 321 /* Free work space */ 322 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 323 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 324 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 325 *B = newmat; 326 PetscFunctionReturn(0); 327 } 328 329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 330 { 331 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 334 PetscFunctionBegin; 335 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 340 { 341 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 360 { 361 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt nstash,reallocs; 364 365 PetscFunctionBegin; 366 if (mdn->donotstash || mat->nooffprocentries) return(0); 367 368 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 369 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 370 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 375 { 376 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 377 PetscErrorCode ierr; 378 PetscInt i,*row,*col,flg,j,rstart,ncols; 379 PetscMPIInt n; 380 PetscScalar *val; 381 382 PetscFunctionBegin; 383 if (!mdn->donotstash && !mat->nooffprocentries) { 384 /* wait on receives */ 385 while (1) { 386 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 387 if (!flg) break; 388 389 for (i=0; i<n;) { 390 /* Now identify the consecutive vals belonging to the same row */ 391 for (j=i,rstart=row[j]; j<n; j++) { 392 if (row[j] != rstart) break; 393 } 394 if (j < n) ncols = j-i; 395 else ncols = n-i; 396 /* Now assemble all these values with a single function call */ 397 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 398 i = j; 399 } 400 } 401 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 402 } 403 404 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 405 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 406 407 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 408 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 409 } 410 PetscFunctionReturn(0); 411 } 412 413 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 414 { 415 PetscErrorCode ierr; 416 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 424 { 425 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 426 PetscErrorCode ierr; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 438 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 441 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 442 } 443 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 444 if (diag != 0.0) { 445 Vec d; 446 447 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 448 ierr = VecSet(d,diag);CHKERRQ(ierr); 449 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 450 ierr = VecDestroy(&d);CHKERRQ(ierr); 451 } 452 ierr = PetscFree(lrows);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 PetscErrorCode ierr; 465 const PetscScalar *ax; 466 PetscScalar *ay; 467 468 PetscFunctionBegin; 469 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 470 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 471 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 472 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 473 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 474 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 475 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 480 { 481 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 482 PetscErrorCode ierr; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 486 PetscFunctionBegin; 487 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 488 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 489 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 490 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 491 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 492 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 493 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 498 { 499 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 500 PetscErrorCode ierr; 501 const PetscScalar *ax; 502 PetscScalar *ay; 503 504 PetscFunctionBegin; 505 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 506 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 507 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 508 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 509 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 510 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 511 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 512 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 517 { 518 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 519 PetscErrorCode ierr; 520 const PetscScalar *ax; 521 PetscScalar *ay; 522 523 PetscFunctionBegin; 524 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 525 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 526 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 527 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 528 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 529 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 530 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 531 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 536 { 537 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 538 PetscErrorCode ierr; 539 PetscInt lda,len,i,n,m = A->rmap->n,radd; 540 PetscScalar *x,zero = 0.0; 541 const PetscScalar *av; 542 543 PetscFunctionBegin; 544 ierr = VecSet(v,zero);CHKERRQ(ierr); 545 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 546 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 547 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 548 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 549 radd = A->rmap->rstart*m; 550 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 551 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 552 for (i=0; i<len; i++) { 553 x[i] = av[radd + i*lda + i]; 554 } 555 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 556 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 PetscErrorCode MatDestroy_MPIDense(Mat mat) 561 { 562 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 563 PetscErrorCode ierr; 564 565 PetscFunctionBegin; 566 #if defined(PETSC_USE_LOG) 567 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 568 #endif 569 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 570 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 571 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 572 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 573 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 574 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 575 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 576 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 577 578 ierr = PetscFree(mat->data);CHKERRQ(ierr); 579 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 580 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 592 #if defined(PETSC_HAVE_ELEMENTAL) 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 594 #endif 595 #if defined(PETSC_HAVE_SCALAPACK) 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 597 #endif 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 601 #if defined (PETSC_HAVE_CUDA) 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 604 #endif 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 609 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 611 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 612 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 614 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 619 620 #include <petscdraw.h> 621 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 622 { 623 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 624 PetscErrorCode ierr; 625 PetscMPIInt rank; 626 PetscViewerType vtype; 627 PetscBool iascii,isdraw; 628 PetscViewer sviewer; 629 PetscViewerFormat format; 630 631 PetscFunctionBegin; 632 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 633 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 634 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 635 if (iascii) { 636 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 637 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 638 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 639 MatInfo info; 640 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 641 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 642 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 643 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 644 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 645 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } else if (format == PETSC_VIEWER_ASCII_INFO) { 648 PetscFunctionReturn(0); 649 } 650 } else if (isdraw) { 651 PetscDraw draw; 652 PetscBool isnull; 653 654 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 655 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 656 if (isnull) PetscFunctionReturn(0); 657 } 658 659 { 660 /* assemble the entire matrix onto first processor. */ 661 Mat A; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 663 PetscInt *cols; 664 PetscScalar *vals; 665 666 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 667 if (!rank) { 668 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 669 } else { 670 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 671 } 672 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 673 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 674 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 675 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 676 677 /* Copy the matrix ... This isn't the most efficient means, 678 but it's quick for now */ 679 A->insertmode = INSERT_VALUES; 680 681 row = mat->rmap->rstart; 682 m = mdn->A->rmap->n; 683 for (i=0; i<m; i++) { 684 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 685 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 686 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 687 row++; 688 } 689 690 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 692 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 693 if (!rank) { 694 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 695 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 698 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 699 ierr = MatDestroy(&A);CHKERRQ(ierr); 700 } 701 PetscFunctionReturn(0); 702 } 703 704 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 705 { 706 PetscErrorCode ierr; 707 PetscBool iascii,isbinary,isdraw,issocket; 708 709 PetscFunctionBegin; 710 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 711 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 713 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 714 715 if (iascii || issocket || isdraw) { 716 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 717 } else if (isbinary) { 718 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 724 { 725 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 726 Mat mdn = mat->A; 727 PetscErrorCode ierr; 728 PetscLogDouble isend[5],irecv[5]; 729 730 PetscFunctionBegin; 731 info->block_size = 1.0; 732 733 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 734 735 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 736 isend[3] = info->memory; isend[4] = info->mallocs; 737 if (flag == MAT_LOCAL) { 738 info->nz_used = isend[0]; 739 info->nz_allocated = isend[1]; 740 info->nz_unneeded = isend[2]; 741 info->memory = isend[3]; 742 info->mallocs = isend[4]; 743 } else if (flag == MAT_GLOBAL_MAX) { 744 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 745 746 info->nz_used = irecv[0]; 747 info->nz_allocated = irecv[1]; 748 info->nz_unneeded = irecv[2]; 749 info->memory = irecv[3]; 750 info->mallocs = irecv[4]; 751 } else if (flag == MAT_GLOBAL_SUM) { 752 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 753 754 info->nz_used = irecv[0]; 755 info->nz_allocated = irecv[1]; 756 info->nz_unneeded = irecv[2]; 757 info->memory = irecv[3]; 758 info->mallocs = irecv[4]; 759 } 760 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 761 info->fill_ratio_needed = 0; 762 info->factor_mallocs = 0; 763 PetscFunctionReturn(0); 764 } 765 766 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 767 { 768 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 switch (op) { 773 case MAT_NEW_NONZERO_LOCATIONS: 774 case MAT_NEW_NONZERO_LOCATION_ERR: 775 case MAT_NEW_NONZERO_ALLOCATION_ERR: 776 MatCheckPreallocated(A,1); 777 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 778 break; 779 case MAT_ROW_ORIENTED: 780 MatCheckPreallocated(A,1); 781 a->roworiented = flg; 782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 783 break; 784 case MAT_NEW_DIAGONALS: 785 case MAT_KEEP_NONZERO_PATTERN: 786 case MAT_USE_HASH_TABLE: 787 case MAT_SORTED_FULL: 788 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 789 break; 790 case MAT_IGNORE_OFF_PROC_ENTRIES: 791 a->donotstash = flg; 792 break; 793 case MAT_SYMMETRIC: 794 case MAT_STRUCTURALLY_SYMMETRIC: 795 case MAT_HERMITIAN: 796 case MAT_SYMMETRY_ETERNAL: 797 case MAT_IGNORE_LOWER_TRIANGULAR: 798 case MAT_IGNORE_ZERO_ENTRIES: 799 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 800 break; 801 default: 802 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 808 { 809 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 810 const PetscScalar *l; 811 PetscScalar x,*v,*vv,*r; 812 PetscErrorCode ierr; 813 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 814 815 PetscFunctionBegin; 816 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 817 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 818 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 819 if (ll) { 820 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 821 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 822 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 823 for (i=0; i<m; i++) { 824 x = l[i]; 825 v = vv + i; 826 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 827 } 828 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 829 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 830 } 831 if (rr) { 832 const PetscScalar *ar; 833 834 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 835 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 836 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 837 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 838 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 839 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 840 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 841 for (i=0; i<n; i++) { 842 x = r[i]; 843 v = vv + i*lda; 844 for (j=0; j<m; j++) (*v++) *= x; 845 } 846 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 847 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 848 } 849 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 854 { 855 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 856 PetscErrorCode ierr; 857 PetscInt i,j; 858 PetscMPIInt size; 859 PetscReal sum = 0.0; 860 const PetscScalar *av,*v; 861 862 PetscFunctionBegin; 863 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 864 v = av; 865 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 866 if (size == 1) { 867 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 868 } else { 869 if (type == NORM_FROBENIUS) { 870 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 871 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 872 } 873 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 874 *nrm = PetscSqrtReal(*nrm); 875 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 876 } else if (type == NORM_1) { 877 PetscReal *tmp,*tmp2; 878 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 879 *nrm = 0.0; 880 v = av; 881 for (j=0; j<mdn->A->cmap->n; j++) { 882 for (i=0; i<mdn->A->rmap->n; i++) { 883 tmp[j] += PetscAbsScalar(*v); v++; 884 } 885 } 886 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 887 for (j=0; j<A->cmap->N; j++) { 888 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 889 } 890 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 891 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 892 } else if (type == NORM_INFINITY) { /* max row norm */ 893 PetscReal ntemp; 894 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 895 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 896 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 897 } 898 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 903 { 904 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 905 Mat B; 906 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 907 PetscErrorCode ierr; 908 PetscInt j,i,lda; 909 PetscScalar *v; 910 911 PetscFunctionBegin; 912 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 913 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 914 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 915 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 916 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 917 } else B = *matout; 918 919 m = a->A->rmap->n; n = a->A->cmap->n; 920 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 921 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 922 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 923 for (i=0; i<m; i++) rwork[i] = rstart + i; 924 for (j=0; j<n; j++) { 925 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 926 v += lda; 927 } 928 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 929 ierr = PetscFree(rwork);CHKERRQ(ierr); 930 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 932 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 933 *matout = B; 934 } else { 935 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } 939 940 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 941 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 942 943 PetscErrorCode MatSetUp_MPIDense(Mat A) 944 { 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 949 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 950 if (!A->preallocated) { 951 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 952 } 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 957 { 958 PetscErrorCode ierr; 959 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 960 961 PetscFunctionBegin; 962 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatConjugate_MPIDense(Mat mat) 967 { 968 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = MatConjugate(a->A);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatRealPart_MPIDense(Mat A) 977 { 978 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 ierr = MatRealPart(a->A);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 987 { 988 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 997 { 998 PetscErrorCode ierr; 999 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1000 1001 PetscFunctionBegin; 1002 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1003 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1004 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1009 1010 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1011 { 1012 PetscErrorCode ierr; 1013 PetscInt i,n; 1014 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1015 PetscReal *work; 1016 1017 PetscFunctionBegin; 1018 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1019 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1020 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1021 if (type == NORM_2) { 1022 for (i=0; i<n; i++) work[i] *= work[i]; 1023 } 1024 if (type == NORM_INFINITY) { 1025 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1026 } else { 1027 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1028 } 1029 ierr = PetscFree(work);CHKERRQ(ierr); 1030 if (type == NORM_2) { 1031 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #if defined(PETSC_HAVE_CUDA) 1037 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1038 { 1039 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1040 PetscErrorCode ierr; 1041 PetscInt lda; 1042 1043 PetscFunctionBegin; 1044 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1045 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1046 if (!a->cvec) { 1047 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1048 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1049 } 1050 a->vecinuse = col + 1; 1051 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1052 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1053 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1054 *v = a->cvec; 1055 PetscFunctionReturn(0); 1056 } 1057 1058 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1059 { 1060 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1061 PetscErrorCode ierr; 1062 1063 PetscFunctionBegin; 1064 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1065 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1066 a->vecinuse = 0; 1067 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1068 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1069 *v = NULL; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1074 { 1075 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1076 PetscInt lda; 1077 PetscErrorCode ierr; 1078 1079 PetscFunctionBegin; 1080 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1081 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1082 if (!a->cvec) { 1083 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1084 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1085 } 1086 a->vecinuse = col + 1; 1087 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1088 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1089 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1090 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1091 *v = a->cvec; 1092 PetscFunctionReturn(0); 1093 } 1094 1095 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1096 { 1097 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1102 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1103 a->vecinuse = 0; 1104 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1105 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1106 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1107 *v = NULL; 1108 PetscFunctionReturn(0); 1109 } 1110 1111 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1112 { 1113 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1114 PetscErrorCode ierr; 1115 PetscInt lda; 1116 1117 PetscFunctionBegin; 1118 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1119 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1120 if (!a->cvec) { 1121 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1122 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1123 } 1124 a->vecinuse = col + 1; 1125 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1126 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1127 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1128 *v = a->cvec; 1129 PetscFunctionReturn(0); 1130 } 1131 1132 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1133 { 1134 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1139 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1140 a->vecinuse = 0; 1141 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1142 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1143 *v = NULL; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1148 { 1149 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1154 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1155 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1160 { 1161 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1162 PetscErrorCode ierr; 1163 1164 PetscFunctionBegin; 1165 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1166 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1167 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1172 { 1173 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1178 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1179 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1184 { 1185 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1186 PetscErrorCode ierr; 1187 1188 PetscFunctionBegin; 1189 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1194 { 1195 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1196 PetscErrorCode ierr; 1197 1198 PetscFunctionBegin; 1199 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1204 { 1205 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1214 { 1215 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1224 { 1225 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1234 { 1235 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1244 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1245 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1246 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1247 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1248 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1249 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1250 1251 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1252 { 1253 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1258 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1259 if (d->A) { 1260 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1261 } 1262 mat->boundtocpu = bind; 1263 if (!bind) { 1264 PetscBool iscuda; 1265 1266 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1267 if (!iscuda) { 1268 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1269 } 1270 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1271 if (!iscuda) { 1272 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1273 } 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1280 } else { 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1287 } 1288 if (d->cmat) { 1289 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1290 } 1291 PetscFunctionReturn(0); 1292 } 1293 1294 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1295 { 1296 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1297 PetscErrorCode ierr; 1298 PetscBool iscuda; 1299 1300 PetscFunctionBegin; 1301 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1302 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1303 if (!iscuda) PetscFunctionReturn(0); 1304 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1305 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1306 if (!d->A) { 1307 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1308 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1309 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1310 } 1311 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1312 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1313 A->preallocated = PETSC_TRUE; 1314 PetscFunctionReturn(0); 1315 } 1316 #endif 1317 1318 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1319 { 1320 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1329 { 1330 PetscFunctionBegin; 1331 *missing = PETSC_FALSE; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1336 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1337 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1338 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1339 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1340 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1341 1342 /* -------------------------------------------------------------------*/ 1343 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1344 MatGetRow_MPIDense, 1345 MatRestoreRow_MPIDense, 1346 MatMult_MPIDense, 1347 /* 4*/ MatMultAdd_MPIDense, 1348 MatMultTranspose_MPIDense, 1349 MatMultTransposeAdd_MPIDense, 1350 NULL, 1351 NULL, 1352 NULL, 1353 /* 10*/ NULL, 1354 NULL, 1355 NULL, 1356 NULL, 1357 MatTranspose_MPIDense, 1358 /* 15*/ MatGetInfo_MPIDense, 1359 MatEqual_MPIDense, 1360 MatGetDiagonal_MPIDense, 1361 MatDiagonalScale_MPIDense, 1362 MatNorm_MPIDense, 1363 /* 20*/ MatAssemblyBegin_MPIDense, 1364 MatAssemblyEnd_MPIDense, 1365 MatSetOption_MPIDense, 1366 MatZeroEntries_MPIDense, 1367 /* 24*/ MatZeroRows_MPIDense, 1368 NULL, 1369 NULL, 1370 NULL, 1371 NULL, 1372 /* 29*/ MatSetUp_MPIDense, 1373 NULL, 1374 NULL, 1375 MatGetDiagonalBlock_MPIDense, 1376 NULL, 1377 /* 34*/ MatDuplicate_MPIDense, 1378 NULL, 1379 NULL, 1380 NULL, 1381 NULL, 1382 /* 39*/ MatAXPY_MPIDense, 1383 MatCreateSubMatrices_MPIDense, 1384 NULL, 1385 MatGetValues_MPIDense, 1386 NULL, 1387 /* 44*/ NULL, 1388 MatScale_MPIDense, 1389 MatShift_Basic, 1390 NULL, 1391 NULL, 1392 /* 49*/ MatSetRandom_MPIDense, 1393 NULL, 1394 NULL, 1395 NULL, 1396 NULL, 1397 /* 54*/ NULL, 1398 NULL, 1399 NULL, 1400 NULL, 1401 NULL, 1402 /* 59*/ MatCreateSubMatrix_MPIDense, 1403 MatDestroy_MPIDense, 1404 MatView_MPIDense, 1405 NULL, 1406 NULL, 1407 /* 64*/ NULL, 1408 NULL, 1409 NULL, 1410 NULL, 1411 NULL, 1412 /* 69*/ NULL, 1413 NULL, 1414 NULL, 1415 NULL, 1416 NULL, 1417 /* 74*/ NULL, 1418 NULL, 1419 NULL, 1420 NULL, 1421 NULL, 1422 /* 79*/ NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 /* 83*/ MatLoad_MPIDense, 1427 NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 NULL, 1432 /* 89*/ NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /* 94*/ NULL, 1438 NULL, 1439 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1440 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1441 NULL, 1442 /* 99*/ MatProductSetFromOptions_MPIDense, 1443 NULL, 1444 NULL, 1445 MatConjugate_MPIDense, 1446 NULL, 1447 /*104*/ NULL, 1448 MatRealPart_MPIDense, 1449 MatImaginaryPart_MPIDense, 1450 NULL, 1451 NULL, 1452 /*109*/ NULL, 1453 NULL, 1454 NULL, 1455 MatGetColumnVector_MPIDense, 1456 MatMissingDiagonal_MPIDense, 1457 /*114*/ NULL, 1458 NULL, 1459 NULL, 1460 NULL, 1461 NULL, 1462 /*119*/ NULL, 1463 NULL, 1464 NULL, 1465 NULL, 1466 NULL, 1467 /*124*/ NULL, 1468 MatGetColumnNorms_MPIDense, 1469 NULL, 1470 NULL, 1471 NULL, 1472 /*129*/ NULL, 1473 NULL, 1474 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1475 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1476 NULL, 1477 /*134*/ NULL, 1478 NULL, 1479 NULL, 1480 NULL, 1481 NULL, 1482 /*139*/ NULL, 1483 NULL, 1484 NULL, 1485 NULL, 1486 NULL, 1487 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1488 /*145*/ NULL, 1489 NULL, 1490 NULL 1491 }; 1492 1493 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1494 { 1495 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1496 PetscBool iscuda; 1497 PetscErrorCode ierr; 1498 1499 PetscFunctionBegin; 1500 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1501 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1502 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1503 if (!a->A) { 1504 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1505 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1506 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1507 } 1508 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1509 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1510 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1511 mat->preallocated = PETSC_TRUE; 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #if defined(PETSC_HAVE_ELEMENTAL) 1516 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1517 { 1518 Mat mat_elemental; 1519 PetscErrorCode ierr; 1520 PetscScalar *v; 1521 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1522 1523 PetscFunctionBegin; 1524 if (reuse == MAT_REUSE_MATRIX) { 1525 mat_elemental = *newmat; 1526 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1527 } else { 1528 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1529 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1530 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1531 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1532 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1533 } 1534 1535 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1536 for (i=0; i<N; i++) cols[i] = i; 1537 for (i=0; i<m; i++) rows[i] = rstart + i; 1538 1539 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1540 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1541 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1542 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1545 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1546 1547 if (reuse == MAT_INPLACE_MATRIX) { 1548 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1549 } else { 1550 *newmat = mat_elemental; 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 #endif 1555 1556 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1557 { 1558 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1567 { 1568 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1569 PetscErrorCode ierr; 1570 1571 PetscFunctionBegin; 1572 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1577 { 1578 PetscErrorCode ierr; 1579 Mat_MPIDense *mat; 1580 PetscInt m,nloc,N; 1581 1582 PetscFunctionBegin; 1583 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1584 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1585 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1586 PetscInt sum; 1587 1588 if (n == PETSC_DECIDE) { 1589 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1590 } 1591 /* Check sum(n) = N */ 1592 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1593 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1594 1595 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1596 } 1597 1598 /* numeric phase */ 1599 mat = (Mat_MPIDense*)(*outmat)->data; 1600 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1601 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1602 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #if defined(PETSC_HAVE_CUDA) 1607 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1608 { 1609 Mat B; 1610 Mat_MPIDense *m; 1611 PetscErrorCode ierr; 1612 1613 PetscFunctionBegin; 1614 if (reuse == MAT_INITIAL_MATRIX) { 1615 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1616 } else if (reuse == MAT_REUSE_MATRIX) { 1617 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1618 } 1619 1620 B = *newmat; 1621 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1622 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1623 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1624 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1639 m = (Mat_MPIDense*)(B)->data; 1640 if (m->A) { 1641 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1642 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1643 } 1644 B->ops->bindtocpu = NULL; 1645 B->offloadmask = PETSC_OFFLOAD_CPU; 1646 PetscFunctionReturn(0); 1647 } 1648 1649 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1650 { 1651 Mat B; 1652 Mat_MPIDense *m; 1653 PetscErrorCode ierr; 1654 1655 PetscFunctionBegin; 1656 if (reuse == MAT_INITIAL_MATRIX) { 1657 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1658 } else if (reuse == MAT_REUSE_MATRIX) { 1659 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1660 } 1661 1662 B = *newmat; 1663 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1664 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1665 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1680 m = (Mat_MPIDense*)(B)->data; 1681 if (m->A) { 1682 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1683 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1684 B->offloadmask = PETSC_OFFLOAD_BOTH; 1685 } else { 1686 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1687 } 1688 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1689 1690 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1691 PetscFunctionReturn(0); 1692 } 1693 #endif 1694 1695 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1696 { 1697 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1698 PetscErrorCode ierr; 1699 PetscInt lda; 1700 1701 PetscFunctionBegin; 1702 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1703 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1704 if (!a->cvec) { 1705 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1706 } 1707 a->vecinuse = col + 1; 1708 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1709 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1710 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1711 *v = a->cvec; 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1716 { 1717 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1722 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1723 a->vecinuse = 0; 1724 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1725 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1726 *v = NULL; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1731 { 1732 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1733 PetscErrorCode ierr; 1734 PetscInt lda; 1735 1736 PetscFunctionBegin; 1737 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1738 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1739 if (!a->cvec) { 1740 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1741 } 1742 a->vecinuse = col + 1; 1743 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1744 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1745 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1746 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1747 *v = a->cvec; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1752 { 1753 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1754 PetscErrorCode ierr; 1755 1756 PetscFunctionBegin; 1757 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1758 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1759 a->vecinuse = 0; 1760 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1761 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1762 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1763 *v = NULL; 1764 PetscFunctionReturn(0); 1765 } 1766 1767 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1768 { 1769 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1770 PetscErrorCode ierr; 1771 PetscInt lda; 1772 1773 PetscFunctionBegin; 1774 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1775 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1776 if (!a->cvec) { 1777 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1778 } 1779 a->vecinuse = col + 1; 1780 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1781 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1782 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1783 *v = a->cvec; 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1788 { 1789 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1790 PetscErrorCode ierr; 1791 1792 PetscFunctionBegin; 1793 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1794 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1795 a->vecinuse = 0; 1796 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1797 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1798 *v = NULL; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1803 { 1804 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1805 Mat_MPIDense *c; 1806 PetscErrorCode ierr; 1807 MPI_Comm comm; 1808 PetscBool setup = PETSC_FALSE; 1809 1810 PetscFunctionBegin; 1811 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1812 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1813 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1814 if (!a->cmat) { 1815 setup = PETSC_TRUE; 1816 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1817 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1818 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1819 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1820 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1821 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1822 } else if (cend-cbegin != a->cmat->cmap->N) { 1823 setup = PETSC_TRUE; 1824 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1825 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1826 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1827 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1828 } 1829 c = (Mat_MPIDense*)a->cmat->data; 1830 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1831 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1832 if (setup) { /* do we really need this? */ 1833 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1834 } 1835 a->cmat->preallocated = PETSC_TRUE; 1836 a->cmat->assembled = PETSC_TRUE; 1837 a->matinuse = cbegin + 1; 1838 *v = a->cmat; 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1843 { 1844 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1845 Mat_MPIDense *c; 1846 PetscErrorCode ierr; 1847 1848 PetscFunctionBegin; 1849 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1850 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1851 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1852 a->matinuse = 0; 1853 c = (Mat_MPIDense*)a->cmat->data; 1854 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1855 *v = NULL; 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1860 { 1861 Mat_MPIDense *a; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1866 mat->data = (void*)a; 1867 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1868 1869 mat->insertmode = NOT_SET_VALUES; 1870 1871 /* build cache for off array entries formed */ 1872 a->donotstash = PETSC_FALSE; 1873 1874 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1875 1876 /* stuff used for matrix vector multiply */ 1877 a->lvec = NULL; 1878 a->Mvctx = NULL; 1879 a->roworiented = PETSC_TRUE; 1880 1881 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1882 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1883 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1884 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1885 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1886 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1887 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1888 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1889 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1890 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1891 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1892 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1893 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1894 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1895 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1896 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1898 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1899 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1900 #if defined(PETSC_HAVE_ELEMENTAL) 1901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1902 #endif 1903 #if defined(PETSC_HAVE_SCALAPACK) 1904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1905 #endif 1906 #if defined(PETSC_HAVE_CUDA) 1907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1908 #endif 1909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1912 #if defined(PETSC_HAVE_CUDA) 1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1915 #endif 1916 1917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1919 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 /*MC 1924 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1925 1926 Options Database Keys: 1927 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1928 1929 Level: beginner 1930 1931 .seealso: 1932 1933 M*/ 1934 #if defined(PETSC_HAVE_CUDA) 1935 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1936 { 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 1941 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1942 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 #endif 1946 1947 /*MC 1948 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1949 1950 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1951 and MATMPIDENSE otherwise. 1952 1953 Options Database Keys: 1954 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1955 1956 Level: beginner 1957 1958 1959 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1960 M*/ 1961 1962 /*MC 1963 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1964 1965 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1966 and MATMPIDENSECUDA otherwise. 1967 1968 Options Database Keys: 1969 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1970 1971 Level: beginner 1972 1973 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1974 M*/ 1975 1976 /*@C 1977 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1978 1979 Collective 1980 1981 Input Parameters: 1982 . B - the matrix 1983 - data - optional location of matrix data. Set data=NULL for PETSc 1984 to control all matrix memory allocation. 1985 1986 Notes: 1987 The dense format is fully compatible with standard Fortran 77 1988 storage by columns. 1989 1990 The data input variable is intended primarily for Fortran programmers 1991 who wish to allocate their own matrix memory space. Most users should 1992 set data=NULL. 1993 1994 Level: intermediate 1995 1996 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1997 @*/ 1998 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1999 { 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2004 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2005 PetscFunctionReturn(0); 2006 } 2007 2008 /*@ 2009 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2010 array provided by the user. This is useful to avoid copying an array 2011 into a matrix 2012 2013 Not Collective 2014 2015 Input Parameters: 2016 + mat - the matrix 2017 - array - the array in column major order 2018 2019 Notes: 2020 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2021 freed when the matrix is destroyed. 2022 2023 Level: developer 2024 2025 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2026 2027 @*/ 2028 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2029 { 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2034 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2035 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2036 #if defined(PETSC_HAVE_CUDA) 2037 mat->offloadmask = PETSC_OFFLOAD_CPU; 2038 #endif 2039 PetscFunctionReturn(0); 2040 } 2041 2042 /*@ 2043 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2044 2045 Not Collective 2046 2047 Input Parameters: 2048 . mat - the matrix 2049 2050 Notes: 2051 You can only call this after a call to MatDensePlaceArray() 2052 2053 Level: developer 2054 2055 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2056 2057 @*/ 2058 PetscErrorCode MatDenseResetArray(Mat mat) 2059 { 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2064 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2065 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /*@ 2070 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2071 array provided by the user. This is useful to avoid copying an array 2072 into a matrix 2073 2074 Not Collective 2075 2076 Input Parameters: 2077 + mat - the matrix 2078 - array - the array in column major order 2079 2080 Notes: 2081 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2082 freed by the user. It will be freed when the matrix is destroyed. 2083 2084 Level: developer 2085 2086 .seealso: MatDenseGetArray(), VecReplaceArray() 2087 @*/ 2088 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2089 { 2090 PetscErrorCode ierr; 2091 2092 PetscFunctionBegin; 2093 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2094 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2095 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2096 #if defined(PETSC_HAVE_CUDA) 2097 mat->offloadmask = PETSC_OFFLOAD_CPU; 2098 #endif 2099 PetscFunctionReturn(0); 2100 } 2101 2102 #if defined(PETSC_HAVE_CUDA) 2103 /*@C 2104 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2105 array provided by the user. This is useful to avoid copying an array 2106 into a matrix 2107 2108 Not Collective 2109 2110 Input Parameters: 2111 + mat - the matrix 2112 - array - the array in column major order 2113 2114 Notes: 2115 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2116 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2117 2118 Level: developer 2119 2120 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2121 @*/ 2122 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2123 { 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2128 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2129 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2130 mat->offloadmask = PETSC_OFFLOAD_GPU; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /*@C 2135 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2136 2137 Not Collective 2138 2139 Input Parameters: 2140 . mat - the matrix 2141 2142 Notes: 2143 You can only call this after a call to MatDenseCUDAPlaceArray() 2144 2145 Level: developer 2146 2147 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2148 2149 @*/ 2150 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2151 { 2152 PetscErrorCode ierr; 2153 2154 PetscFunctionBegin; 2155 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2156 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2157 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 /*@C 2162 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2163 array provided by the user. This is useful to avoid copying an array 2164 into a matrix 2165 2166 Not Collective 2167 2168 Input Parameters: 2169 + mat - the matrix 2170 - array - the array in column major order 2171 2172 Notes: 2173 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2174 The memory passed in CANNOT be freed by the user. It will be freed 2175 when the matrix is destroyed. The array should respect the matrix leading dimension. 2176 2177 Level: developer 2178 2179 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2180 @*/ 2181 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2182 { 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2187 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2188 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2189 mat->offloadmask = PETSC_OFFLOAD_GPU; 2190 PetscFunctionReturn(0); 2191 } 2192 2193 /*@C 2194 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2195 2196 Not Collective 2197 2198 Input Parameters: 2199 . A - the matrix 2200 2201 Output Parameters 2202 . array - the GPU array in column major order 2203 2204 Notes: 2205 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2206 2207 Level: developer 2208 2209 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2210 @*/ 2211 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2212 { 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2217 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2218 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 /*@C 2223 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2224 2225 Not Collective 2226 2227 Input Parameters: 2228 + A - the matrix 2229 - array - the GPU array in column major order 2230 2231 Notes: 2232 2233 Level: developer 2234 2235 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2236 @*/ 2237 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2238 { 2239 PetscErrorCode ierr; 2240 2241 PetscFunctionBegin; 2242 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2243 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2244 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2245 A->offloadmask = PETSC_OFFLOAD_GPU; 2246 PetscFunctionReturn(0); 2247 } 2248 2249 /*@C 2250 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2251 2252 Not Collective 2253 2254 Input Parameters: 2255 . A - the matrix 2256 2257 Output Parameters 2258 . array - the GPU array in column major order 2259 2260 Notes: 2261 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2262 2263 Level: developer 2264 2265 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2266 @*/ 2267 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2268 { 2269 PetscErrorCode ierr; 2270 2271 PetscFunctionBegin; 2272 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2273 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 /*@C 2278 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2279 2280 Not Collective 2281 2282 Input Parameters: 2283 + A - the matrix 2284 - array - the GPU array in column major order 2285 2286 Notes: 2287 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2288 2289 Level: developer 2290 2291 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2292 @*/ 2293 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2294 { 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 /*@C 2303 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2304 2305 Not Collective 2306 2307 Input Parameters: 2308 . A - the matrix 2309 2310 Output Parameters 2311 . array - the GPU array in column major order 2312 2313 Notes: 2314 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2315 2316 Level: developer 2317 2318 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2319 @*/ 2320 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2321 { 2322 PetscErrorCode ierr; 2323 2324 PetscFunctionBegin; 2325 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2326 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2327 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2328 PetscFunctionReturn(0); 2329 } 2330 2331 /*@C 2332 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2333 2334 Not Collective 2335 2336 Input Parameters: 2337 + A - the matrix 2338 - array - the GPU array in column major order 2339 2340 Notes: 2341 2342 Level: developer 2343 2344 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2345 @*/ 2346 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2347 { 2348 PetscErrorCode ierr; 2349 2350 PetscFunctionBegin; 2351 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2352 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2353 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2354 A->offloadmask = PETSC_OFFLOAD_GPU; 2355 PetscFunctionReturn(0); 2356 } 2357 #endif 2358 2359 /*@C 2360 MatCreateDense - Creates a matrix in dense format. 2361 2362 Collective 2363 2364 Input Parameters: 2365 + comm - MPI communicator 2366 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2367 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2368 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2369 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2370 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2371 to control all matrix memory allocation. 2372 2373 Output Parameter: 2374 . A - the matrix 2375 2376 Notes: 2377 The dense format is fully compatible with standard Fortran 77 2378 storage by columns. 2379 2380 The data input variable is intended primarily for Fortran programmers 2381 who wish to allocate their own matrix memory space. Most users should 2382 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2383 2384 The user MUST specify either the local or global matrix dimensions 2385 (possibly both). 2386 2387 Level: intermediate 2388 2389 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2390 @*/ 2391 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2392 { 2393 PetscErrorCode ierr; 2394 PetscMPIInt size; 2395 2396 PetscFunctionBegin; 2397 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2398 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2399 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2400 if (size > 1) { 2401 PetscBool havedata = (PetscBool)!!data; 2402 2403 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2404 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2405 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 2406 if (havedata) { /* user provided data array, so no need to assemble */ 2407 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2408 (*A)->assembled = PETSC_TRUE; 2409 } 2410 } else { 2411 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2412 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2413 } 2414 PetscFunctionReturn(0); 2415 } 2416 2417 #if defined(PETSC_HAVE_CUDA) 2418 /*@C 2419 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2420 2421 Collective 2422 2423 Input Parameters: 2424 + comm - MPI communicator 2425 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2426 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2427 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2428 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2429 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2430 to control matrix memory allocation. 2431 2432 Output Parameter: 2433 . A - the matrix 2434 2435 Notes: 2436 2437 Level: intermediate 2438 2439 .seealso: MatCreate(), MatCreateDense() 2440 @*/ 2441 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2442 { 2443 PetscErrorCode ierr; 2444 PetscMPIInt size; 2445 2446 PetscFunctionBegin; 2447 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2448 PetscValidLogicalCollectiveBool(*A,!!data,6); 2449 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2450 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2451 if (size > 1) { 2452 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2453 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2454 if (data) { /* user provided data array, so no need to assemble */ 2455 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2456 (*A)->assembled = PETSC_TRUE; 2457 } 2458 } else { 2459 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2460 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2461 } 2462 PetscFunctionReturn(0); 2463 } 2464 #endif 2465 2466 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2467 { 2468 Mat mat; 2469 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2470 PetscErrorCode ierr; 2471 2472 PetscFunctionBegin; 2473 *newmat = NULL; 2474 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2475 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2476 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2477 a = (Mat_MPIDense*)mat->data; 2478 2479 mat->factortype = A->factortype; 2480 mat->assembled = PETSC_TRUE; 2481 mat->preallocated = PETSC_TRUE; 2482 2483 mat->insertmode = NOT_SET_VALUES; 2484 a->donotstash = oldmat->donotstash; 2485 2486 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2487 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2488 2489 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2490 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2491 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2492 2493 *newmat = mat; 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2498 { 2499 PetscErrorCode ierr; 2500 PetscBool isbinary; 2501 #if defined(PETSC_HAVE_HDF5) 2502 PetscBool ishdf5; 2503 #endif 2504 2505 PetscFunctionBegin; 2506 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2507 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2508 /* force binary viewer to load .info file if it has not yet done so */ 2509 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2510 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2511 #if defined(PETSC_HAVE_HDF5) 2512 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2513 #endif 2514 if (isbinary) { 2515 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2516 #if defined(PETSC_HAVE_HDF5) 2517 } else if (ishdf5) { 2518 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2519 #endif 2520 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2525 { 2526 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2527 Mat a,b; 2528 PetscBool flg; 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 a = matA->A; 2533 b = matB->A; 2534 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2535 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2540 { 2541 PetscErrorCode ierr; 2542 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2543 2544 PetscFunctionBegin; 2545 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2546 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2547 ierr = PetscFree(atb);CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2552 { 2553 PetscErrorCode ierr; 2554 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2555 2556 PetscFunctionBegin; 2557 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2558 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2559 ierr = PetscFree(abt);CHKERRQ(ierr); 2560 PetscFunctionReturn(0); 2561 } 2562 2563 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2564 { 2565 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2566 Mat_TransMatMultDense *atb; 2567 PetscErrorCode ierr; 2568 MPI_Comm comm; 2569 PetscMPIInt size,*recvcounts; 2570 PetscScalar *carray,*sendbuf; 2571 const PetscScalar *atbarray; 2572 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2573 const PetscInt *ranges; 2574 2575 PetscFunctionBegin; 2576 MatCheckProduct(C,3); 2577 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2578 atb = (Mat_TransMatMultDense *)C->product->data; 2579 recvcounts = atb->recvcounts; 2580 sendbuf = atb->sendbuf; 2581 2582 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2583 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2584 2585 /* compute atbarray = aseq^T * bseq */ 2586 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2587 2588 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2589 2590 /* arrange atbarray into sendbuf */ 2591 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2592 for (proc=0, k=0; proc<size; proc++) { 2593 for (j=0; j<cN; j++) { 2594 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2595 } 2596 } 2597 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2598 2599 /* sum all atbarray to local values of C */ 2600 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2601 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2602 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2603 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2604 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2605 PetscFunctionReturn(0); 2606 } 2607 2608 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2609 { 2610 PetscErrorCode ierr; 2611 MPI_Comm comm; 2612 PetscMPIInt size; 2613 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2614 Mat_TransMatMultDense *atb; 2615 PetscBool cisdense; 2616 PetscInt i; 2617 const PetscInt *ranges; 2618 2619 PetscFunctionBegin; 2620 MatCheckProduct(C,3); 2621 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2622 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2623 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2624 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2625 } 2626 2627 /* create matrix product C */ 2628 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2629 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2630 if (!cisdense) { 2631 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2632 } 2633 ierr = MatSetUp(C);CHKERRQ(ierr); 2634 2635 /* create data structure for reuse C */ 2636 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2637 ierr = PetscNew(&atb);CHKERRQ(ierr); 2638 cM = C->rmap->N; 2639 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2640 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2641 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2642 2643 C->product->data = atb; 2644 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2645 PetscFunctionReturn(0); 2646 } 2647 2648 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2649 { 2650 PetscErrorCode ierr; 2651 MPI_Comm comm; 2652 PetscMPIInt i, size; 2653 PetscInt maxRows, bufsiz; 2654 PetscMPIInt tag; 2655 PetscInt alg; 2656 Mat_MatTransMultDense *abt; 2657 Mat_Product *product = C->product; 2658 PetscBool flg; 2659 2660 PetscFunctionBegin; 2661 MatCheckProduct(C,4); 2662 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2663 /* check local size of A and B */ 2664 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2665 2666 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2667 alg = flg ? 0 : 1; 2668 2669 /* setup matrix product C */ 2670 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2671 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2672 ierr = MatSetUp(C);CHKERRQ(ierr); 2673 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2674 2675 /* create data structure for reuse C */ 2676 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2677 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2678 ierr = PetscNew(&abt);CHKERRQ(ierr); 2679 abt->tag = tag; 2680 abt->alg = alg; 2681 switch (alg) { 2682 case 1: /* alg: "cyclic" */ 2683 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2684 bufsiz = A->cmap->N * maxRows; 2685 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2686 break; 2687 default: /* alg: "allgatherv" */ 2688 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2689 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2690 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2691 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2692 break; 2693 } 2694 2695 C->product->data = abt; 2696 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2697 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2698 PetscFunctionReturn(0); 2699 } 2700 2701 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2702 { 2703 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2704 Mat_MatTransMultDense *abt; 2705 PetscErrorCode ierr; 2706 MPI_Comm comm; 2707 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2708 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2709 PetscInt i,cK=A->cmap->N,k,j,bn; 2710 PetscScalar _DOne=1.0,_DZero=0.0; 2711 const PetscScalar *av,*bv; 2712 PetscBLASInt cm, cn, ck, alda, blda, clda; 2713 MPI_Request reqs[2]; 2714 const PetscInt *ranges; 2715 2716 PetscFunctionBegin; 2717 MatCheckProduct(C,3); 2718 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2719 abt = (Mat_MatTransMultDense*)C->product->data; 2720 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2721 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2722 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2723 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2724 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2725 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2726 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2728 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2730 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2732 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2733 bn = B->rmap->n; 2734 if (blda == bn) { 2735 sendbuf = (PetscScalar*)bv; 2736 } else { 2737 sendbuf = abt->buf[0]; 2738 for (k = 0, i = 0; i < cK; i++) { 2739 for (j = 0; j < bn; j++, k++) { 2740 sendbuf[k] = bv[i * blda + j]; 2741 } 2742 } 2743 } 2744 if (size > 1) { 2745 sendto = (rank + size - 1) % size; 2746 recvfrom = (rank + size + 1) % size; 2747 } else { 2748 sendto = recvfrom = 0; 2749 } 2750 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2751 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2752 recvisfrom = rank; 2753 for (i = 0; i < size; i++) { 2754 /* we have finished receiving in sending, bufs can be read/modified */ 2755 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2756 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2757 2758 if (nextrecvisfrom != rank) { 2759 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2760 sendsiz = cK * bn; 2761 recvsiz = cK * nextbn; 2762 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2763 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2764 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2765 } 2766 2767 /* local aseq * sendbuf^T */ 2768 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2769 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2770 2771 if (nextrecvisfrom != rank) { 2772 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2773 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2774 } 2775 bn = nextbn; 2776 recvisfrom = nextrecvisfrom; 2777 sendbuf = recvbuf; 2778 } 2779 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2780 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2781 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2782 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 2787 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2788 { 2789 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2790 Mat_MatTransMultDense *abt; 2791 PetscErrorCode ierr; 2792 MPI_Comm comm; 2793 PetscMPIInt size; 2794 PetscScalar *cv, *sendbuf, *recvbuf; 2795 const PetscScalar *av,*bv; 2796 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2797 PetscScalar _DOne=1.0,_DZero=0.0; 2798 PetscBLASInt cm, cn, ck, alda, clda; 2799 2800 PetscFunctionBegin; 2801 MatCheckProduct(C,3); 2802 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2803 abt = (Mat_MatTransMultDense*)C->product->data; 2804 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2805 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2806 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2807 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2808 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2809 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2810 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2811 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2812 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2813 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2814 /* copy transpose of B into buf[0] */ 2815 bn = B->rmap->n; 2816 sendbuf = abt->buf[0]; 2817 recvbuf = abt->buf[1]; 2818 for (k = 0, j = 0; j < bn; j++) { 2819 for (i = 0; i < cK; i++, k++) { 2820 sendbuf[k] = bv[i * blda + j]; 2821 } 2822 } 2823 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2824 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2825 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2826 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2827 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2828 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2829 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2830 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2831 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2832 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2838 { 2839 Mat_MatTransMultDense *abt; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 MatCheckProduct(C,3); 2844 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2845 abt = (Mat_MatTransMultDense*)C->product->data; 2846 switch (abt->alg) { 2847 case 1: 2848 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2849 break; 2850 default: 2851 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2852 break; 2853 } 2854 PetscFunctionReturn(0); 2855 } 2856 2857 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2858 { 2859 PetscErrorCode ierr; 2860 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2861 2862 PetscFunctionBegin; 2863 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2864 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2865 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2866 ierr = PetscFree(ab);CHKERRQ(ierr); 2867 PetscFunctionReturn(0); 2868 } 2869 2870 #if defined(PETSC_HAVE_ELEMENTAL) 2871 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2872 { 2873 PetscErrorCode ierr; 2874 Mat_MatMultDense *ab; 2875 2876 PetscFunctionBegin; 2877 MatCheckProduct(C,3); 2878 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2879 ab = (Mat_MatMultDense*)C->product->data; 2880 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2881 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2882 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2883 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2884 PetscFunctionReturn(0); 2885 } 2886 2887 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2888 { 2889 PetscErrorCode ierr; 2890 Mat Ae,Be,Ce; 2891 Mat_MatMultDense *ab; 2892 2893 PetscFunctionBegin; 2894 MatCheckProduct(C,4); 2895 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2896 /* check local size of A and B */ 2897 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2898 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2899 } 2900 2901 /* create elemental matrices Ae and Be */ 2902 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2903 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2904 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2905 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2906 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2907 2908 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2909 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2910 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2911 ierr = MatSetUp(Be);CHKERRQ(ierr); 2912 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2913 2914 /* compute symbolic Ce = Ae*Be */ 2915 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2916 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2917 2918 /* setup C */ 2919 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2920 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2921 ierr = MatSetUp(C);CHKERRQ(ierr); 2922 2923 /* create data structure for reuse Cdense */ 2924 ierr = PetscNew(&ab);CHKERRQ(ierr); 2925 ab->Ae = Ae; 2926 ab->Be = Be; 2927 ab->Ce = Ce; 2928 2929 C->product->data = ab; 2930 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2931 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2932 PetscFunctionReturn(0); 2933 } 2934 #endif 2935 /* ----------------------------------------------- */ 2936 #if defined(PETSC_HAVE_ELEMENTAL) 2937 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2938 { 2939 PetscFunctionBegin; 2940 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2941 C->ops->productsymbolic = MatProductSymbolic_AB; 2942 PetscFunctionReturn(0); 2943 } 2944 #endif 2945 2946 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2947 { 2948 Mat_Product *product = C->product; 2949 Mat A = product->A,B=product->B; 2950 2951 PetscFunctionBegin; 2952 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2953 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2954 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2955 C->ops->productsymbolic = MatProductSymbolic_AtB; 2956 PetscFunctionReturn(0); 2957 } 2958 2959 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2960 { 2961 PetscErrorCode ierr; 2962 Mat_Product *product = C->product; 2963 const char *algTypes[2] = {"allgatherv","cyclic"}; 2964 PetscInt alg,nalg = 2; 2965 PetscBool flg = PETSC_FALSE; 2966 2967 PetscFunctionBegin; 2968 /* Set default algorithm */ 2969 alg = 0; /* default is allgatherv */ 2970 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2971 if (flg) { 2972 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2973 } 2974 2975 /* Get runtime option */ 2976 if (product->api_user) { 2977 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2978 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2979 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2980 } else { 2981 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2982 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2983 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2984 } 2985 if (flg) { 2986 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2987 } 2988 2989 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2990 C->ops->productsymbolic = MatProductSymbolic_ABt; 2991 PetscFunctionReturn(0); 2992 } 2993 2994 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2995 { 2996 PetscErrorCode ierr; 2997 Mat_Product *product = C->product; 2998 2999 PetscFunctionBegin; 3000 switch (product->type) { 3001 #if defined(PETSC_HAVE_ELEMENTAL) 3002 case MATPRODUCT_AB: 3003 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3004 break; 3005 #endif 3006 case MATPRODUCT_AtB: 3007 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3008 break; 3009 case MATPRODUCT_ABt: 3010 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3011 break; 3012 default: 3013 break; 3014 } 3015 PetscFunctionReturn(0); 3016 } 3017