1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 47 PetscErrorCode ierr; 48 49 PetscFunctionBegin; 50 ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 57 PetscErrorCode ierr; 58 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 59 60 PetscFunctionBegin; 61 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 62 lrow = row - rstart; 63 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 68 { 69 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 70 PetscErrorCode ierr; 71 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 72 73 PetscFunctionBegin; 74 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 75 lrow = row - rstart; 76 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 81 { 82 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 83 PetscErrorCode ierr; 84 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 85 PetscScalar *array; 86 MPI_Comm comm; 87 PetscBool flg; 88 Mat B; 89 90 PetscFunctionBegin; 91 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 92 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 93 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 94 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 95 96 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 97 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 98 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 99 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 100 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 101 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 102 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 103 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 104 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 105 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 107 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 108 *a = B; 109 ierr = MatDestroy(&B);CHKERRQ(ierr); 110 } else *a = B; 111 PetscFunctionReturn(0); 112 } 113 114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 117 PetscErrorCode ierr; 118 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 119 PetscBool roworiented = A->roworiented; 120 121 PetscFunctionBegin; 122 for (i=0; i<m; i++) { 123 if (idxm[i] < 0) continue; 124 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 125 if (idxm[i] >= rstart && idxm[i] < rend) { 126 row = idxm[i] - rstart; 127 if (roworiented) { 128 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 129 } else { 130 for (j=0; j<n; j++) { 131 if (idxn[j] < 0) continue; 132 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 133 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 134 } 135 } 136 } else if (!A->donotstash) { 137 mat->assembled = PETSC_FALSE; 138 if (roworiented) { 139 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 140 } else { 141 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 142 } 143 } 144 } 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 149 { 150 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 153 154 PetscFunctionBegin; 155 for (i=0; i<m; i++) { 156 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 157 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 158 if (idxm[i] >= rstart && idxm[i] < rend) { 159 row = idxm[i] - rstart; 160 for (j=0; j<n; j++) { 161 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 162 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 163 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 164 } 165 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscBool iscuda; 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 if (!a->A) { 188 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 189 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 190 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 191 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 192 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 193 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 194 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 195 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 196 } 197 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 230 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 235 { 236 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 241 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 242 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 247 { 248 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 249 PetscErrorCode ierr; 250 251 PetscFunctionBegin; 252 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 253 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 254 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 259 { 260 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 265 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 266 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 271 { 272 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 273 PetscErrorCode ierr; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 284 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 285 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 288 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 289 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 290 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 291 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 292 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 309 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 310 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 311 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 317 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 318 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 326 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 /* Free work space */ 333 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 334 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 335 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 371 { 372 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 373 PetscErrorCode ierr; 374 PetscInt nstash,reallocs; 375 376 PetscFunctionBegin; 377 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 378 379 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 380 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 381 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 386 { 387 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 388 PetscErrorCode ierr; 389 PetscInt i,*row,*col,flg,j,rstart,ncols; 390 PetscMPIInt n; 391 PetscScalar *val; 392 393 PetscFunctionBegin; 394 if (!mdn->donotstash && !mat->nooffprocentries) { 395 /* wait on receives */ 396 while (1) { 397 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 398 if (!flg) break; 399 400 for (i=0; i<n;) { 401 /* Now identify the consecutive vals belonging to the same row */ 402 for (j=i,rstart=row[j]; j<n; j++) { 403 if (row[j] != rstart) break; 404 } 405 if (j < n) ncols = j-i; 406 else ncols = n-i; 407 /* Now assemble all these values with a single function call */ 408 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 409 i = j; 410 } 411 } 412 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 413 } 414 415 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 417 418 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 419 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 425 { 426 PetscErrorCode ierr; 427 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 428 429 PetscFunctionBegin; 430 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 437 PetscErrorCode ierr; 438 PetscInt i,len,*lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 449 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 450 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 451 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 452 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 453 } 454 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 455 if (diag != 0.0) { 456 Vec d; 457 458 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 459 ierr = VecSet(d,diag);CHKERRQ(ierr); 460 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecDestroy(&d);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(lrows);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype,aymtype; 479 480 PetscFunctionBegin; 481 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 482 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 483 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr); 484 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr); 485 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 486 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 487 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype,aymtype; 498 499 PetscFunctionBegin; 500 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 501 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 502 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr); 503 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr); 504 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 505 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 506 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype,aymtype; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 520 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 522 ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr); 523 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 524 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 525 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 526 ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 PetscErrorCode ierr; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype,aymtype; 537 538 PetscFunctionBegin; 539 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 540 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 541 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 542 ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr); 543 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 544 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 545 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 546 ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 553 PetscErrorCode ierr; 554 PetscInt lda,len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 const PetscScalar *av; 557 558 PetscFunctionBegin; 559 ierr = VecSet(v,zero);CHKERRQ(ierr); 560 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 561 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 562 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 563 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 564 radd = A->rmap->rstart*m; 565 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 566 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 567 for (i=0; i<len; i++) { 568 x[i] = av[radd + i*lda + i]; 569 } 570 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 571 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatDestroy_MPIDense(Mat mat) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 #if defined(PETSC_USE_LOG) 582 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 583 #endif 584 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 585 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 586 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 587 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 588 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 589 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 590 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 591 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 592 593 ierr = PetscFree(mat->data);CHKERRQ(ierr); 594 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 595 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 609 #if defined(PETSC_HAVE_ELEMENTAL) 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 611 #endif 612 #if defined(PETSC_HAVE_SCALAPACK) 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 614 #endif 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 616 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 618 #if defined (PETSC_HAVE_CUDA) 619 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 620 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 621 #endif 622 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 623 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 624 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 625 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 628 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 630 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 631 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 636 637 #include <petscdraw.h> 638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 639 { 640 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 641 PetscErrorCode ierr; 642 PetscMPIInt rank; 643 PetscViewerType vtype; 644 PetscBool iascii,isdraw; 645 PetscViewer sviewer; 646 PetscViewerFormat format; 647 648 PetscFunctionBegin; 649 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 650 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 651 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 652 if (iascii) { 653 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 654 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 655 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 656 MatInfo info; 657 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 658 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 659 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 660 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 661 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 662 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } else if (format == PETSC_VIEWER_ASCII_INFO) { 665 PetscFunctionReturn(0); 666 } 667 } else if (isdraw) { 668 PetscDraw draw; 669 PetscBool isnull; 670 671 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 672 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 673 if (isnull) PetscFunctionReturn(0); 674 } 675 676 { 677 /* assemble the entire matrix onto first processor. */ 678 Mat A; 679 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 680 PetscInt *cols; 681 PetscScalar *vals; 682 683 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 684 if (!rank) { 685 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 686 } else { 687 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 688 } 689 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 690 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 691 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 692 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 693 694 /* Copy the matrix ... This isn't the most efficient means, 695 but it's quick for now */ 696 A->insertmode = INSERT_VALUES; 697 698 row = mat->rmap->rstart; 699 m = mdn->A->rmap->n; 700 for (i=0; i<m; i++) { 701 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 702 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 703 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 704 row++; 705 } 706 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 715 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 716 ierr = MatDestroy(&A);CHKERRQ(ierr); 717 } 718 PetscFunctionReturn(0); 719 } 720 721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 722 { 723 PetscErrorCode ierr; 724 PetscBool iascii,isbinary,isdraw,issocket; 725 726 PetscFunctionBegin; 727 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 729 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 731 732 if (iascii || issocket || isdraw) { 733 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else if (isbinary) { 735 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 741 { 742 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 743 Mat mdn = mat->A; 744 PetscErrorCode ierr; 745 PetscLogDouble isend[5],irecv[5]; 746 747 PetscFunctionBegin; 748 info->block_size = 1.0; 749 750 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 751 752 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 753 isend[3] = info->memory; isend[4] = info->mallocs; 754 if (flag == MAT_LOCAL) { 755 info->nz_used = isend[0]; 756 info->nz_allocated = isend[1]; 757 info->nz_unneeded = isend[2]; 758 info->memory = isend[3]; 759 info->mallocs = isend[4]; 760 } else if (flag == MAT_GLOBAL_MAX) { 761 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 762 763 info->nz_used = irecv[0]; 764 info->nz_allocated = irecv[1]; 765 info->nz_unneeded = irecv[2]; 766 info->memory = irecv[3]; 767 info->mallocs = irecv[4]; 768 } else if (flag == MAT_GLOBAL_SUM) { 769 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 770 771 info->nz_used = irecv[0]; 772 info->nz_allocated = irecv[1]; 773 info->nz_unneeded = irecv[2]; 774 info->memory = irecv[3]; 775 info->mallocs = irecv[4]; 776 } 777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 778 info->fill_ratio_needed = 0; 779 info->factor_mallocs = 0; 780 PetscFunctionReturn(0); 781 } 782 783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 784 { 785 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 786 PetscErrorCode ierr; 787 788 PetscFunctionBegin; 789 switch (op) { 790 case MAT_NEW_NONZERO_LOCATIONS: 791 case MAT_NEW_NONZERO_LOCATION_ERR: 792 case MAT_NEW_NONZERO_ALLOCATION_ERR: 793 MatCheckPreallocated(A,1); 794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 795 break; 796 case MAT_ROW_ORIENTED: 797 MatCheckPreallocated(A,1); 798 a->roworiented = flg; 799 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 800 break; 801 case MAT_FORCE_DIAGONAL_ENTRIES: 802 case MAT_KEEP_NONZERO_PATTERN: 803 case MAT_USE_HASH_TABLE: 804 case MAT_SORTED_FULL: 805 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 806 break; 807 case MAT_IGNORE_OFF_PROC_ENTRIES: 808 a->donotstash = flg; 809 break; 810 case MAT_SYMMETRIC: 811 case MAT_STRUCTURALLY_SYMMETRIC: 812 case MAT_HERMITIAN: 813 case MAT_SYMMETRY_ETERNAL: 814 case MAT_IGNORE_LOWER_TRIANGULAR: 815 case MAT_IGNORE_ZERO_ENTRIES: 816 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 817 break; 818 default: 819 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 825 { 826 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 827 const PetscScalar *l; 828 PetscScalar x,*v,*vv,*r; 829 PetscErrorCode ierr; 830 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 831 832 PetscFunctionBegin; 833 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 834 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 835 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 836 if (ll) { 837 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 838 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 839 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 840 for (i=0; i<m; i++) { 841 x = l[i]; 842 v = vv + i; 843 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 844 } 845 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 846 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 847 } 848 if (rr) { 849 const PetscScalar *ar; 850 851 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 852 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 853 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 854 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 855 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr); 856 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr); 857 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 858 for (i=0; i<n; i++) { 859 x = r[i]; 860 v = vv + i*lda; 861 for (j=0; j<m; j++) (*v++) *= x; 862 } 863 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 864 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 865 } 866 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 871 { 872 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 873 PetscErrorCode ierr; 874 PetscInt i,j; 875 PetscMPIInt size; 876 PetscReal sum = 0.0; 877 const PetscScalar *av,*v; 878 879 PetscFunctionBegin; 880 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 881 v = av; 882 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 883 if (size == 1) { 884 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 885 } else { 886 if (type == NORM_FROBENIUS) { 887 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 889 } 890 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 891 *nrm = PetscSqrtReal(*nrm); 892 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 893 } else if (type == NORM_1) { 894 PetscReal *tmp,*tmp2; 895 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 896 *nrm = 0.0; 897 v = av; 898 for (j=0; j<mdn->A->cmap->n; j++) { 899 for (i=0; i<mdn->A->rmap->n; i++) { 900 tmp[j] += PetscAbsScalar(*v); v++; 901 } 902 } 903 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 904 for (j=0; j<A->cmap->N; j++) { 905 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 906 } 907 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 908 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 909 } else if (type == NORM_INFINITY) { /* max row norm */ 910 PetscReal ntemp; 911 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 912 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 913 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 914 } 915 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 920 { 921 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 922 Mat B; 923 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 924 PetscErrorCode ierr; 925 PetscInt j,i,lda; 926 PetscScalar *v; 927 928 PetscFunctionBegin; 929 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 930 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 931 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 932 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 933 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 934 } else B = *matout; 935 936 m = a->A->rmap->n; n = a->A->cmap->n; 937 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 938 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 939 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 940 for (i=0; i<m; i++) rwork[i] = rstart + i; 941 for (j=0; j<n; j++) { 942 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 943 v += lda; 944 } 945 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 946 ierr = PetscFree(rwork);CHKERRQ(ierr); 947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 949 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 950 *matout = B; 951 } else { 952 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 959 960 PetscErrorCode MatSetUp_MPIDense(Mat A) 961 { 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 966 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 967 if (!A->preallocated) { 968 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 974 { 975 PetscErrorCode ierr; 976 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 977 978 PetscFunctionBegin; 979 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatConjugate_MPIDense(Mat mat) 984 { 985 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatConjugate(a->A);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PetscErrorCode MatRealPart_MPIDense(Mat A) 994 { 995 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1004 { 1005 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1014 { 1015 PetscErrorCode ierr; 1016 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1017 1018 PetscFunctionBegin; 1019 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1020 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1021 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1026 1027 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1028 { 1029 PetscErrorCode ierr; 1030 PetscInt i,n; 1031 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1032 PetscReal *work; 1033 1034 PetscFunctionBegin; 1035 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1036 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1037 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1038 if (type == NORM_2) { 1039 for (i=0; i<n; i++) work[i] *= work[i]; 1040 } 1041 if (type == NORM_INFINITY) { 1042 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRMPI(ierr); 1043 } else { 1044 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRMPI(ierr); 1045 } 1046 ierr = PetscFree(work);CHKERRQ(ierr); 1047 if (type == NORM_2) { 1048 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #if defined(PETSC_HAVE_CUDA) 1054 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1055 { 1056 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1057 PetscErrorCode ierr; 1058 PetscInt lda; 1059 1060 PetscFunctionBegin; 1061 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1062 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1063 if (!a->cvec) { 1064 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1066 } 1067 a->vecinuse = col + 1; 1068 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1069 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1070 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1071 *v = a->cvec; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1076 { 1077 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1082 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1083 a->vecinuse = 0; 1084 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1085 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1086 *v = NULL; 1087 PetscFunctionReturn(0); 1088 } 1089 1090 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1091 { 1092 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1093 PetscInt lda; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1098 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1099 if (!a->cvec) { 1100 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1101 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1102 } 1103 a->vecinuse = col + 1; 1104 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1105 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1106 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1107 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1108 *v = a->cvec; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1113 { 1114 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1119 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1120 a->vecinuse = 0; 1121 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1122 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1123 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1124 *v = NULL; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1129 { 1130 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1131 PetscErrorCode ierr; 1132 PetscInt lda; 1133 1134 PetscFunctionBegin; 1135 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1136 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1137 if (!a->cvec) { 1138 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1139 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1140 } 1141 a->vecinuse = col + 1; 1142 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1143 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1144 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1145 *v = a->cvec; 1146 PetscFunctionReturn(0); 1147 } 1148 1149 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1150 { 1151 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1156 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1157 a->vecinuse = 0; 1158 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1159 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1160 *v = NULL; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1165 { 1166 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1171 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1172 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1177 { 1178 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1183 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1184 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1189 { 1190 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1195 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1196 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1201 { 1202 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1211 { 1212 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1213 PetscErrorCode ierr; 1214 1215 PetscFunctionBegin; 1216 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1221 { 1222 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1231 { 1232 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1241 { 1242 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1251 { 1252 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1261 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1262 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1263 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1264 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1265 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1266 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1267 1268 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1269 { 1270 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1275 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1276 if (d->A) { 1277 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1278 } 1279 mat->boundtocpu = bind; 1280 if (!bind) { 1281 PetscBool iscuda; 1282 1283 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1284 if (!iscuda) { 1285 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1286 } 1287 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1288 if (!iscuda) { 1289 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1290 } 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1297 } else { 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1304 } 1305 if (d->cmat) { 1306 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1312 { 1313 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1314 PetscErrorCode ierr; 1315 PetscBool iscuda; 1316 1317 PetscFunctionBegin; 1318 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1319 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1320 if (!iscuda) PetscFunctionReturn(0); 1321 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1322 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1323 if (!d->A) { 1324 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1325 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1326 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1327 } 1328 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1329 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1330 A->preallocated = PETSC_TRUE; 1331 PetscFunctionReturn(0); 1332 } 1333 #endif 1334 1335 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1336 { 1337 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1338 PetscErrorCode ierr; 1339 1340 PetscFunctionBegin; 1341 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1346 { 1347 PetscFunctionBegin; 1348 *missing = PETSC_FALSE; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1353 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1354 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1355 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1356 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1357 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1358 1359 /* -------------------------------------------------------------------*/ 1360 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1361 MatGetRow_MPIDense, 1362 MatRestoreRow_MPIDense, 1363 MatMult_MPIDense, 1364 /* 4*/ MatMultAdd_MPIDense, 1365 MatMultTranspose_MPIDense, 1366 MatMultTransposeAdd_MPIDense, 1367 NULL, 1368 NULL, 1369 NULL, 1370 /* 10*/ NULL, 1371 NULL, 1372 NULL, 1373 NULL, 1374 MatTranspose_MPIDense, 1375 /* 15*/ MatGetInfo_MPIDense, 1376 MatEqual_MPIDense, 1377 MatGetDiagonal_MPIDense, 1378 MatDiagonalScale_MPIDense, 1379 MatNorm_MPIDense, 1380 /* 20*/ MatAssemblyBegin_MPIDense, 1381 MatAssemblyEnd_MPIDense, 1382 MatSetOption_MPIDense, 1383 MatZeroEntries_MPIDense, 1384 /* 24*/ MatZeroRows_MPIDense, 1385 NULL, 1386 NULL, 1387 NULL, 1388 NULL, 1389 /* 29*/ MatSetUp_MPIDense, 1390 NULL, 1391 NULL, 1392 MatGetDiagonalBlock_MPIDense, 1393 NULL, 1394 /* 34*/ MatDuplicate_MPIDense, 1395 NULL, 1396 NULL, 1397 NULL, 1398 NULL, 1399 /* 39*/ MatAXPY_MPIDense, 1400 MatCreateSubMatrices_MPIDense, 1401 NULL, 1402 MatGetValues_MPIDense, 1403 MatCopy_MPIDense, 1404 /* 44*/ NULL, 1405 MatScale_MPIDense, 1406 MatShift_Basic, 1407 NULL, 1408 NULL, 1409 /* 49*/ MatSetRandom_MPIDense, 1410 NULL, 1411 NULL, 1412 NULL, 1413 NULL, 1414 /* 54*/ NULL, 1415 NULL, 1416 NULL, 1417 NULL, 1418 NULL, 1419 /* 59*/ MatCreateSubMatrix_MPIDense, 1420 MatDestroy_MPIDense, 1421 MatView_MPIDense, 1422 NULL, 1423 NULL, 1424 /* 64*/ NULL, 1425 NULL, 1426 NULL, 1427 NULL, 1428 NULL, 1429 /* 69*/ NULL, 1430 NULL, 1431 NULL, 1432 NULL, 1433 NULL, 1434 /* 74*/ NULL, 1435 NULL, 1436 NULL, 1437 NULL, 1438 NULL, 1439 /* 79*/ NULL, 1440 NULL, 1441 NULL, 1442 NULL, 1443 /* 83*/ MatLoad_MPIDense, 1444 NULL, 1445 NULL, 1446 NULL, 1447 NULL, 1448 NULL, 1449 /* 89*/ NULL, 1450 NULL, 1451 NULL, 1452 NULL, 1453 NULL, 1454 /* 94*/ NULL, 1455 NULL, 1456 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1457 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1458 NULL, 1459 /* 99*/ MatProductSetFromOptions_MPIDense, 1460 NULL, 1461 NULL, 1462 MatConjugate_MPIDense, 1463 NULL, 1464 /*104*/ NULL, 1465 MatRealPart_MPIDense, 1466 MatImaginaryPart_MPIDense, 1467 NULL, 1468 NULL, 1469 /*109*/ NULL, 1470 NULL, 1471 NULL, 1472 MatGetColumnVector_MPIDense, 1473 MatMissingDiagonal_MPIDense, 1474 /*114*/ NULL, 1475 NULL, 1476 NULL, 1477 NULL, 1478 NULL, 1479 /*119*/ NULL, 1480 NULL, 1481 NULL, 1482 NULL, 1483 NULL, 1484 /*124*/ NULL, 1485 MatGetColumnNorms_MPIDense, 1486 NULL, 1487 NULL, 1488 NULL, 1489 /*129*/ NULL, 1490 NULL, 1491 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1492 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1493 NULL, 1494 /*134*/ NULL, 1495 NULL, 1496 NULL, 1497 NULL, 1498 NULL, 1499 /*139*/ NULL, 1500 NULL, 1501 NULL, 1502 NULL, 1503 NULL, 1504 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1505 /*145*/ NULL, 1506 NULL, 1507 NULL 1508 }; 1509 1510 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1511 { 1512 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1513 PetscBool iscuda; 1514 PetscErrorCode ierr; 1515 1516 PetscFunctionBegin; 1517 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1518 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1519 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1520 if (!a->A) { 1521 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1522 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1523 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1524 } 1525 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1526 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1527 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1528 mat->preallocated = PETSC_TRUE; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1533 { 1534 Mat B,C; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr); 1539 ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1540 ierr = MatDestroy(&C);CHKERRQ(ierr); 1541 if (reuse == MAT_REUSE_MATRIX) { 1542 C = *newmat; 1543 } else C = NULL; 1544 ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1545 ierr = MatDestroy(&B);CHKERRQ(ierr); 1546 if (reuse == MAT_INPLACE_MATRIX) { 1547 ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr); 1548 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1549 PetscFunctionReturn(0); 1550 } 1551 1552 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1553 { 1554 Mat B,C; 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr); 1559 ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1560 if (reuse == MAT_REUSE_MATRIX) { 1561 C = *newmat; 1562 } else C = NULL; 1563 ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1564 ierr = MatDestroy(&B);CHKERRQ(ierr); 1565 if (reuse == MAT_INPLACE_MATRIX) { 1566 ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr); 1567 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #if defined(PETSC_HAVE_ELEMENTAL) 1572 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1573 { 1574 Mat mat_elemental; 1575 PetscErrorCode ierr; 1576 PetscScalar *v; 1577 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1578 1579 PetscFunctionBegin; 1580 if (reuse == MAT_REUSE_MATRIX) { 1581 mat_elemental = *newmat; 1582 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1583 } else { 1584 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1585 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1586 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1587 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1588 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1589 } 1590 1591 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1592 for (i=0; i<N; i++) cols[i] = i; 1593 for (i=0; i<m; i++) rows[i] = rstart + i; 1594 1595 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1596 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1597 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1598 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1599 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1600 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1601 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1602 1603 if (reuse == MAT_INPLACE_MATRIX) { 1604 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1605 } else { 1606 *newmat = mat_elemental; 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 #endif 1611 1612 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1613 { 1614 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1623 { 1624 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1633 { 1634 PetscErrorCode ierr; 1635 Mat_MPIDense *mat; 1636 PetscInt m,nloc,N; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1640 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1641 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1642 PetscInt sum; 1643 1644 if (n == PETSC_DECIDE) { 1645 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1646 } 1647 /* Check sum(n) = N */ 1648 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 1649 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1650 1651 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1652 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1653 } 1654 1655 /* numeric phase */ 1656 mat = (Mat_MPIDense*)(*outmat)->data; 1657 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1658 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #if defined(PETSC_HAVE_CUDA) 1664 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1665 { 1666 Mat B; 1667 Mat_MPIDense *m; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 if (reuse == MAT_INITIAL_MATRIX) { 1672 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1673 } else if (reuse == MAT_REUSE_MATRIX) { 1674 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1675 } 1676 1677 B = *newmat; 1678 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1679 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1680 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1681 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1696 m = (Mat_MPIDense*)(B)->data; 1697 if (m->A) { 1698 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1699 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1700 } 1701 B->ops->bindtocpu = NULL; 1702 B->offloadmask = PETSC_OFFLOAD_CPU; 1703 PetscFunctionReturn(0); 1704 } 1705 1706 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1707 { 1708 Mat B; 1709 Mat_MPIDense *m; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 if (reuse == MAT_INITIAL_MATRIX) { 1714 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1715 } else if (reuse == MAT_REUSE_MATRIX) { 1716 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1717 } 1718 1719 B = *newmat; 1720 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1721 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1722 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1723 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1724 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1725 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1726 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1727 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1728 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1729 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1730 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1732 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1734 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1735 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1737 m = (Mat_MPIDense*)(B->data); 1738 if (m->A) { 1739 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1740 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1741 B->offloadmask = PETSC_OFFLOAD_BOTH; 1742 } else { 1743 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1744 } 1745 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1746 1747 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1748 PetscFunctionReturn(0); 1749 } 1750 #endif 1751 1752 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1753 { 1754 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1755 PetscErrorCode ierr; 1756 PetscInt lda; 1757 1758 PetscFunctionBegin; 1759 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1760 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1761 if (!a->cvec) { 1762 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1763 } 1764 a->vecinuse = col + 1; 1765 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1766 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1767 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1768 *v = a->cvec; 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1773 { 1774 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1779 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1780 a->vecinuse = 0; 1781 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1782 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1783 *v = NULL; 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1788 { 1789 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1790 PetscErrorCode ierr; 1791 PetscInt lda; 1792 1793 PetscFunctionBegin; 1794 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1795 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1796 if (!a->cvec) { 1797 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1798 } 1799 a->vecinuse = col + 1; 1800 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1801 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1802 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1803 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1804 *v = a->cvec; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1809 { 1810 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1811 PetscErrorCode ierr; 1812 1813 PetscFunctionBegin; 1814 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1815 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1816 a->vecinuse = 0; 1817 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1818 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1819 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1820 *v = NULL; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1825 { 1826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1827 PetscErrorCode ierr; 1828 PetscInt lda; 1829 1830 PetscFunctionBegin; 1831 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1832 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1833 if (!a->cvec) { 1834 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1835 } 1836 a->vecinuse = col + 1; 1837 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1838 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1839 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1840 *v = a->cvec; 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1845 { 1846 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1847 PetscErrorCode ierr; 1848 1849 PetscFunctionBegin; 1850 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1851 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1852 a->vecinuse = 0; 1853 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1854 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1855 *v = NULL; 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1860 { 1861 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1862 Mat_MPIDense *c; 1863 PetscErrorCode ierr; 1864 MPI_Comm comm; 1865 PetscBool setup = PETSC_FALSE; 1866 1867 PetscFunctionBegin; 1868 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1869 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1870 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1871 if (!a->cmat) { 1872 setup = PETSC_TRUE; 1873 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1874 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1875 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1876 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1877 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1878 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1879 } else if (cend-cbegin != a->cmat->cmap->N) { 1880 setup = PETSC_TRUE; 1881 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1882 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1883 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1884 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1885 } 1886 c = (Mat_MPIDense*)a->cmat->data; 1887 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1888 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1889 if (setup) { /* do we really need this? */ 1890 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1891 } 1892 a->cmat->preallocated = PETSC_TRUE; 1893 a->cmat->assembled = PETSC_TRUE; 1894 a->matinuse = cbegin + 1; 1895 *v = a->cmat; 1896 PetscFunctionReturn(0); 1897 } 1898 1899 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1900 { 1901 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1902 Mat_MPIDense *c; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1907 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1908 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1909 a->matinuse = 0; 1910 c = (Mat_MPIDense*)a->cmat->data; 1911 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1912 *v = NULL; 1913 PetscFunctionReturn(0); 1914 } 1915 1916 /*MC 1917 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1918 1919 Options Database Keys: 1920 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1921 1922 Level: beginner 1923 1924 .seealso: MatCreateDense() 1925 1926 M*/ 1927 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1928 { 1929 Mat_MPIDense *a; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1934 mat->data = (void*)a; 1935 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1936 1937 mat->insertmode = NOT_SET_VALUES; 1938 1939 /* build cache for off array entries formed */ 1940 a->donotstash = PETSC_FALSE; 1941 1942 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1943 1944 /* stuff used for matrix vector multiply */ 1945 a->lvec = NULL; 1946 a->Mvctx = NULL; 1947 a->roworiented = PETSC_TRUE; 1948 1949 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1950 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1951 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1952 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1953 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1954 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1955 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1956 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1957 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1958 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1959 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1960 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1961 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1962 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1963 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1964 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1965 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1966 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1967 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1968 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 1969 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr); 1970 #if defined(PETSC_HAVE_ELEMENTAL) 1971 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1972 #endif 1973 #if defined(PETSC_HAVE_SCALAPACK) 1974 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1975 #endif 1976 #if defined(PETSC_HAVE_CUDA) 1977 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1978 #endif 1979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1982 #if defined(PETSC_HAVE_CUDA) 1983 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1984 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1985 #endif 1986 1987 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1988 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1989 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 /*MC 1994 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1995 1996 Options Database Keys: 1997 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1998 1999 Level: beginner 2000 2001 .seealso: 2002 2003 M*/ 2004 #if defined(PETSC_HAVE_CUDA) 2005 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2006 { 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 2011 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 2012 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017 /*MC 2018 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2019 2020 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 2021 and MATMPIDENSE otherwise. 2022 2023 Options Database Keys: 2024 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 2025 2026 Level: beginner 2027 2028 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 2029 M*/ 2030 2031 /*MC 2032 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2033 2034 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 2035 and MATMPIDENSECUDA otherwise. 2036 2037 Options Database Keys: 2038 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 2039 2040 Level: beginner 2041 2042 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 2043 M*/ 2044 2045 /*@C 2046 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2047 2048 Collective 2049 2050 Input Parameters: 2051 . B - the matrix 2052 - data - optional location of matrix data. Set data=NULL for PETSc 2053 to control all matrix memory allocation. 2054 2055 Notes: 2056 The dense format is fully compatible with standard Fortran 77 2057 storage by columns. 2058 2059 The data input variable is intended primarily for Fortran programmers 2060 who wish to allocate their own matrix memory space. Most users should 2061 set data=NULL. 2062 2063 Level: intermediate 2064 2065 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2066 @*/ 2067 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2068 { 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2073 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 /*@ 2078 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2079 array provided by the user. This is useful to avoid copying an array 2080 into a matrix 2081 2082 Not Collective 2083 2084 Input Parameters: 2085 + mat - the matrix 2086 - array - the array in column major order 2087 2088 Notes: 2089 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2090 freed when the matrix is destroyed. 2091 2092 Level: developer 2093 2094 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2095 2096 @*/ 2097 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2098 { 2099 PetscErrorCode ierr; 2100 2101 PetscFunctionBegin; 2102 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2103 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2104 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2105 #if defined(PETSC_HAVE_CUDA) 2106 mat->offloadmask = PETSC_OFFLOAD_CPU; 2107 #endif 2108 PetscFunctionReturn(0); 2109 } 2110 2111 /*@ 2112 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2113 2114 Not Collective 2115 2116 Input Parameters: 2117 . mat - the matrix 2118 2119 Notes: 2120 You can only call this after a call to MatDensePlaceArray() 2121 2122 Level: developer 2123 2124 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2125 2126 @*/ 2127 PetscErrorCode MatDenseResetArray(Mat mat) 2128 { 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2133 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2134 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 2138 /*@ 2139 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2140 array provided by the user. This is useful to avoid copying an array 2141 into a matrix 2142 2143 Not Collective 2144 2145 Input Parameters: 2146 + mat - the matrix 2147 - array - the array in column major order 2148 2149 Notes: 2150 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2151 freed by the user. It will be freed when the matrix is destroyed. 2152 2153 Level: developer 2154 2155 .seealso: MatDenseGetArray(), VecReplaceArray() 2156 @*/ 2157 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2158 { 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2163 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2164 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2165 #if defined(PETSC_HAVE_CUDA) 2166 mat->offloadmask = PETSC_OFFLOAD_CPU; 2167 #endif 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #if defined(PETSC_HAVE_CUDA) 2172 /*@C 2173 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2174 array provided by the user. This is useful to avoid copying an array 2175 into a matrix 2176 2177 Not Collective 2178 2179 Input Parameters: 2180 + mat - the matrix 2181 - array - the array in column major order 2182 2183 Notes: 2184 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2185 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2186 2187 Level: developer 2188 2189 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2190 @*/ 2191 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2192 { 2193 PetscErrorCode ierr; 2194 2195 PetscFunctionBegin; 2196 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2197 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2198 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2199 mat->offloadmask = PETSC_OFFLOAD_GPU; 2200 PetscFunctionReturn(0); 2201 } 2202 2203 /*@C 2204 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2205 2206 Not Collective 2207 2208 Input Parameters: 2209 . mat - the matrix 2210 2211 Notes: 2212 You can only call this after a call to MatDenseCUDAPlaceArray() 2213 2214 Level: developer 2215 2216 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2217 2218 @*/ 2219 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2220 { 2221 PetscErrorCode ierr; 2222 2223 PetscFunctionBegin; 2224 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2225 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2226 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2227 PetscFunctionReturn(0); 2228 } 2229 2230 /*@C 2231 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2232 array provided by the user. This is useful to avoid copying an array 2233 into a matrix 2234 2235 Not Collective 2236 2237 Input Parameters: 2238 + mat - the matrix 2239 - array - the array in column major order 2240 2241 Notes: 2242 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2243 The memory passed in CANNOT be freed by the user. It will be freed 2244 when the matrix is destroyed. The array should respect the matrix leading dimension. 2245 2246 Level: developer 2247 2248 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2249 @*/ 2250 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2251 { 2252 PetscErrorCode ierr; 2253 2254 PetscFunctionBegin; 2255 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2256 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2257 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2258 mat->offloadmask = PETSC_OFFLOAD_GPU; 2259 PetscFunctionReturn(0); 2260 } 2261 2262 /*@C 2263 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2264 2265 Not Collective 2266 2267 Input Parameters: 2268 . A - the matrix 2269 2270 Output Parameters 2271 . array - the GPU array in column major order 2272 2273 Notes: 2274 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2275 2276 Level: developer 2277 2278 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2279 @*/ 2280 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2281 { 2282 PetscErrorCode ierr; 2283 2284 PetscFunctionBegin; 2285 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2286 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2287 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 /*@C 2292 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2293 2294 Not Collective 2295 2296 Input Parameters: 2297 + A - the matrix 2298 - array - the GPU array in column major order 2299 2300 Notes: 2301 2302 Level: developer 2303 2304 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2305 @*/ 2306 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2307 { 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2312 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2313 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2314 A->offloadmask = PETSC_OFFLOAD_GPU; 2315 PetscFunctionReturn(0); 2316 } 2317 2318 /*@C 2319 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2320 2321 Not Collective 2322 2323 Input Parameters: 2324 . A - the matrix 2325 2326 Output Parameters 2327 . array - the GPU array in column major order 2328 2329 Notes: 2330 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2331 2332 Level: developer 2333 2334 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2335 @*/ 2336 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2337 { 2338 PetscErrorCode ierr; 2339 2340 PetscFunctionBegin; 2341 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2342 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 /*@C 2347 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2348 2349 Not Collective 2350 2351 Input Parameters: 2352 + A - the matrix 2353 - array - the GPU array in column major order 2354 2355 Notes: 2356 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2357 2358 Level: developer 2359 2360 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2361 @*/ 2362 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2363 { 2364 PetscErrorCode ierr; 2365 2366 PetscFunctionBegin; 2367 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2368 PetscFunctionReturn(0); 2369 } 2370 2371 /*@C 2372 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2373 2374 Not Collective 2375 2376 Input Parameters: 2377 . A - the matrix 2378 2379 Output Parameters 2380 . array - the GPU array in column major order 2381 2382 Notes: 2383 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2384 2385 Level: developer 2386 2387 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2388 @*/ 2389 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2390 { 2391 PetscErrorCode ierr; 2392 2393 PetscFunctionBegin; 2394 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2395 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2396 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2397 PetscFunctionReturn(0); 2398 } 2399 2400 /*@C 2401 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2402 2403 Not Collective 2404 2405 Input Parameters: 2406 + A - the matrix 2407 - array - the GPU array in column major order 2408 2409 Notes: 2410 2411 Level: developer 2412 2413 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2414 @*/ 2415 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2416 { 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2421 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2422 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2423 A->offloadmask = PETSC_OFFLOAD_GPU; 2424 PetscFunctionReturn(0); 2425 } 2426 #endif 2427 2428 /*@C 2429 MatCreateDense - Creates a matrix in dense format. 2430 2431 Collective 2432 2433 Input Parameters: 2434 + comm - MPI communicator 2435 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2436 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2437 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2438 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2439 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2440 to control all matrix memory allocation. 2441 2442 Output Parameter: 2443 . A - the matrix 2444 2445 Notes: 2446 The dense format is fully compatible with standard Fortran 77 2447 storage by columns. 2448 Note that, although local portions of the matrix are stored in column-major 2449 order, the matrix is partitioned across MPI ranks by row. 2450 2451 The data input variable is intended primarily for Fortran programmers 2452 who wish to allocate their own matrix memory space. Most users should 2453 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2454 2455 The user MUST specify either the local or global matrix dimensions 2456 (possibly both). 2457 2458 Level: intermediate 2459 2460 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2461 @*/ 2462 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2463 { 2464 PetscErrorCode ierr; 2465 PetscMPIInt size; 2466 2467 PetscFunctionBegin; 2468 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2469 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2470 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2471 if (size > 1) { 2472 PetscBool havedata = (PetscBool)!!data; 2473 2474 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2475 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2476 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 2477 if (havedata) { /* user provided data array, so no need to assemble */ 2478 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2479 (*A)->assembled = PETSC_TRUE; 2480 } 2481 } else { 2482 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2483 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2484 } 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #if defined(PETSC_HAVE_CUDA) 2489 /*@C 2490 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2491 2492 Collective 2493 2494 Input Parameters: 2495 + comm - MPI communicator 2496 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2497 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2498 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2499 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2500 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2501 to control matrix memory allocation. 2502 2503 Output Parameter: 2504 . A - the matrix 2505 2506 Notes: 2507 2508 Level: intermediate 2509 2510 .seealso: MatCreate(), MatCreateDense() 2511 @*/ 2512 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2513 { 2514 PetscErrorCode ierr; 2515 PetscMPIInt size; 2516 2517 PetscFunctionBegin; 2518 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2519 PetscValidLogicalCollectiveBool(*A,!!data,6); 2520 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2521 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2522 if (size > 1) { 2523 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2524 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2525 if (data) { /* user provided data array, so no need to assemble */ 2526 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2527 (*A)->assembled = PETSC_TRUE; 2528 } 2529 } else { 2530 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2531 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2532 } 2533 PetscFunctionReturn(0); 2534 } 2535 #endif 2536 2537 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2538 { 2539 Mat mat; 2540 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2541 PetscErrorCode ierr; 2542 2543 PetscFunctionBegin; 2544 *newmat = NULL; 2545 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2546 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2547 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2548 a = (Mat_MPIDense*)mat->data; 2549 2550 mat->factortype = A->factortype; 2551 mat->assembled = PETSC_TRUE; 2552 mat->preallocated = PETSC_TRUE; 2553 2554 mat->insertmode = NOT_SET_VALUES; 2555 a->donotstash = oldmat->donotstash; 2556 2557 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2558 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2559 2560 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2561 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2562 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2563 2564 *newmat = mat; 2565 PetscFunctionReturn(0); 2566 } 2567 2568 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2569 { 2570 PetscErrorCode ierr; 2571 PetscBool isbinary; 2572 #if defined(PETSC_HAVE_HDF5) 2573 PetscBool ishdf5; 2574 #endif 2575 2576 PetscFunctionBegin; 2577 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2578 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2579 /* force binary viewer to load .info file if it has not yet done so */ 2580 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2582 #if defined(PETSC_HAVE_HDF5) 2583 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2584 #endif 2585 if (isbinary) { 2586 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2587 #if defined(PETSC_HAVE_HDF5) 2588 } else if (ishdf5) { 2589 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2590 #endif 2591 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2596 { 2597 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2598 Mat a,b; 2599 PetscBool flg; 2600 PetscErrorCode ierr; 2601 2602 PetscFunctionBegin; 2603 a = matA->A; 2604 b = matB->A; 2605 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2606 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2611 { 2612 PetscErrorCode ierr; 2613 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2614 2615 PetscFunctionBegin; 2616 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2617 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2618 ierr = PetscFree(atb);CHKERRQ(ierr); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2623 { 2624 PetscErrorCode ierr; 2625 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2626 2627 PetscFunctionBegin; 2628 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2629 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2630 ierr = PetscFree(abt);CHKERRQ(ierr); 2631 PetscFunctionReturn(0); 2632 } 2633 2634 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2635 { 2636 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2637 Mat_TransMatMultDense *atb; 2638 PetscErrorCode ierr; 2639 MPI_Comm comm; 2640 PetscMPIInt size,*recvcounts; 2641 PetscScalar *carray,*sendbuf; 2642 const PetscScalar *atbarray; 2643 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2644 const PetscInt *ranges; 2645 2646 PetscFunctionBegin; 2647 MatCheckProduct(C,3); 2648 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2649 atb = (Mat_TransMatMultDense *)C->product->data; 2650 recvcounts = atb->recvcounts; 2651 sendbuf = atb->sendbuf; 2652 2653 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2654 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2655 2656 /* compute atbarray = aseq^T * bseq */ 2657 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2658 2659 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2660 2661 /* arrange atbarray into sendbuf */ 2662 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2663 for (proc=0, k=0; proc<size; proc++) { 2664 for (j=0; j<cN; j++) { 2665 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2666 } 2667 } 2668 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2669 2670 /* sum all atbarray to local values of C */ 2671 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2672 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr); 2673 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2674 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2675 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2676 PetscFunctionReturn(0); 2677 } 2678 2679 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2680 { 2681 PetscErrorCode ierr; 2682 MPI_Comm comm; 2683 PetscMPIInt size; 2684 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2685 Mat_TransMatMultDense *atb; 2686 PetscBool cisdense; 2687 PetscInt i; 2688 const PetscInt *ranges; 2689 2690 PetscFunctionBegin; 2691 MatCheckProduct(C,3); 2692 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2693 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2694 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2695 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2696 } 2697 2698 /* create matrix product C */ 2699 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2700 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2701 if (!cisdense) { 2702 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2703 } 2704 ierr = MatSetUp(C);CHKERRQ(ierr); 2705 2706 /* create data structure for reuse C */ 2707 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2708 ierr = PetscNew(&atb);CHKERRQ(ierr); 2709 cM = C->rmap->N; 2710 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2711 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2712 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2713 2714 C->product->data = atb; 2715 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2716 PetscFunctionReturn(0); 2717 } 2718 2719 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2720 { 2721 PetscErrorCode ierr; 2722 MPI_Comm comm; 2723 PetscMPIInt i, size; 2724 PetscInt maxRows, bufsiz; 2725 PetscMPIInt tag; 2726 PetscInt alg; 2727 Mat_MatTransMultDense *abt; 2728 Mat_Product *product = C->product; 2729 PetscBool flg; 2730 2731 PetscFunctionBegin; 2732 MatCheckProduct(C,4); 2733 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2734 /* check local size of A and B */ 2735 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2736 2737 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2738 alg = flg ? 0 : 1; 2739 2740 /* setup matrix product C */ 2741 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2742 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2743 ierr = MatSetUp(C);CHKERRQ(ierr); 2744 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2745 2746 /* create data structure for reuse C */ 2747 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2748 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2749 ierr = PetscNew(&abt);CHKERRQ(ierr); 2750 abt->tag = tag; 2751 abt->alg = alg; 2752 switch (alg) { 2753 case 1: /* alg: "cyclic" */ 2754 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2755 bufsiz = A->cmap->N * maxRows; 2756 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2757 break; 2758 default: /* alg: "allgatherv" */ 2759 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2760 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2761 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2762 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2763 break; 2764 } 2765 2766 C->product->data = abt; 2767 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2768 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2769 PetscFunctionReturn(0); 2770 } 2771 2772 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2773 { 2774 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2775 Mat_MatTransMultDense *abt; 2776 PetscErrorCode ierr; 2777 MPI_Comm comm; 2778 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2779 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2780 PetscInt i,cK=A->cmap->N,k,j,bn; 2781 PetscScalar _DOne=1.0,_DZero=0.0; 2782 const PetscScalar *av,*bv; 2783 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2784 MPI_Request reqs[2]; 2785 const PetscInt *ranges; 2786 2787 PetscFunctionBegin; 2788 MatCheckProduct(C,3); 2789 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2790 abt = (Mat_MatTransMultDense*)C->product->data; 2791 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2792 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2793 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2794 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2795 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2796 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2797 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2798 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2799 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2800 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2801 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2802 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2803 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2804 bn = B->rmap->n; 2805 if (blda == bn) { 2806 sendbuf = (PetscScalar*)bv; 2807 } else { 2808 sendbuf = abt->buf[0]; 2809 for (k = 0, i = 0; i < cK; i++) { 2810 for (j = 0; j < bn; j++, k++) { 2811 sendbuf[k] = bv[i * blda + j]; 2812 } 2813 } 2814 } 2815 if (size > 1) { 2816 sendto = (rank + size - 1) % size; 2817 recvfrom = (rank + size + 1) % size; 2818 } else { 2819 sendto = recvfrom = 0; 2820 } 2821 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2822 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2823 recvisfrom = rank; 2824 for (i = 0; i < size; i++) { 2825 /* we have finished receiving in sending, bufs can be read/modified */ 2826 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2827 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2828 2829 if (nextrecvisfrom != rank) { 2830 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2831 sendsiz = cK * bn; 2832 recvsiz = cK * nextbn; 2833 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2834 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr); 2835 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr); 2836 } 2837 2838 /* local aseq * sendbuf^T */ 2839 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2840 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2841 2842 if (nextrecvisfrom != rank) { 2843 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2844 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 2845 } 2846 bn = nextbn; 2847 recvisfrom = nextrecvisfrom; 2848 sendbuf = recvbuf; 2849 } 2850 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2851 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2852 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2853 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2859 { 2860 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2861 Mat_MatTransMultDense *abt; 2862 PetscErrorCode ierr; 2863 MPI_Comm comm; 2864 PetscMPIInt size; 2865 PetscScalar *cv, *sendbuf, *recvbuf; 2866 const PetscScalar *av,*bv; 2867 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2868 PetscScalar _DOne=1.0,_DZero=0.0; 2869 PetscBLASInt cm, cn, ck, alda, clda; 2870 2871 PetscFunctionBegin; 2872 MatCheckProduct(C,3); 2873 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2874 abt = (Mat_MatTransMultDense*)C->product->data; 2875 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2876 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2877 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2878 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2879 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2880 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2881 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2882 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2883 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2884 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2885 /* copy transpose of B into buf[0] */ 2886 bn = B->rmap->n; 2887 sendbuf = abt->buf[0]; 2888 recvbuf = abt->buf[1]; 2889 for (k = 0, j = 0; j < bn; j++) { 2890 for (i = 0; i < cK; i++, k++) { 2891 sendbuf[k] = bv[i * blda + j]; 2892 } 2893 } 2894 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2895 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr); 2896 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2897 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2898 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2899 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2900 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2901 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2902 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2903 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 PetscFunctionReturn(0); 2906 } 2907 2908 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2909 { 2910 Mat_MatTransMultDense *abt; 2911 PetscErrorCode ierr; 2912 2913 PetscFunctionBegin; 2914 MatCheckProduct(C,3); 2915 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2916 abt = (Mat_MatTransMultDense*)C->product->data; 2917 switch (abt->alg) { 2918 case 1: 2919 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2920 break; 2921 default: 2922 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2923 break; 2924 } 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2929 { 2930 PetscErrorCode ierr; 2931 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2932 2933 PetscFunctionBegin; 2934 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2935 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2936 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2937 ierr = PetscFree(ab);CHKERRQ(ierr); 2938 PetscFunctionReturn(0); 2939 } 2940 2941 #if defined(PETSC_HAVE_ELEMENTAL) 2942 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2943 { 2944 PetscErrorCode ierr; 2945 Mat_MatMultDense *ab; 2946 2947 PetscFunctionBegin; 2948 MatCheckProduct(C,3); 2949 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2950 ab = (Mat_MatMultDense*)C->product->data; 2951 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2952 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2953 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2954 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2955 PetscFunctionReturn(0); 2956 } 2957 2958 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2959 { 2960 PetscErrorCode ierr; 2961 Mat Ae,Be,Ce; 2962 Mat_MatMultDense *ab; 2963 2964 PetscFunctionBegin; 2965 MatCheckProduct(C,4); 2966 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2967 /* check local size of A and B */ 2968 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2969 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2970 } 2971 2972 /* create elemental matrices Ae and Be */ 2973 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2974 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2975 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2976 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2977 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2978 2979 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2980 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2981 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2982 ierr = MatSetUp(Be);CHKERRQ(ierr); 2983 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2984 2985 /* compute symbolic Ce = Ae*Be */ 2986 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2987 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2988 2989 /* setup C */ 2990 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2991 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2992 ierr = MatSetUp(C);CHKERRQ(ierr); 2993 2994 /* create data structure for reuse Cdense */ 2995 ierr = PetscNew(&ab);CHKERRQ(ierr); 2996 ab->Ae = Ae; 2997 ab->Be = Be; 2998 ab->Ce = Ce; 2999 3000 C->product->data = ab; 3001 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 3002 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 3003 PetscFunctionReturn(0); 3004 } 3005 #endif 3006 /* ----------------------------------------------- */ 3007 #if defined(PETSC_HAVE_ELEMENTAL) 3008 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 3009 { 3010 PetscFunctionBegin; 3011 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 3012 C->ops->productsymbolic = MatProductSymbolic_AB; 3013 PetscFunctionReturn(0); 3014 } 3015 #endif 3016 3017 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 3018 { 3019 Mat_Product *product = C->product; 3020 Mat A = product->A,B=product->B; 3021 3022 PetscFunctionBegin; 3023 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 3024 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 3025 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 3026 C->ops->productsymbolic = MatProductSymbolic_AtB; 3027 PetscFunctionReturn(0); 3028 } 3029 3030 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 3031 { 3032 PetscErrorCode ierr; 3033 Mat_Product *product = C->product; 3034 const char *algTypes[2] = {"allgatherv","cyclic"}; 3035 PetscInt alg,nalg = 2; 3036 PetscBool flg = PETSC_FALSE; 3037 3038 PetscFunctionBegin; 3039 /* Set default algorithm */ 3040 alg = 0; /* default is allgatherv */ 3041 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 3042 if (flg) { 3043 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3044 } 3045 3046 /* Get runtime option */ 3047 if (product->api_user) { 3048 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 3049 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3050 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3051 } else { 3052 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 3053 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3054 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3055 } 3056 if (flg) { 3057 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3058 } 3059 3060 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3061 C->ops->productsymbolic = MatProductSymbolic_ABt; 3062 PetscFunctionReturn(0); 3063 } 3064 3065 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3066 { 3067 PetscErrorCode ierr; 3068 Mat_Product *product = C->product; 3069 3070 PetscFunctionBegin; 3071 switch (product->type) { 3072 #if defined(PETSC_HAVE_ELEMENTAL) 3073 case MATPRODUCT_AB: 3074 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3075 break; 3076 #endif 3077 case MATPRODUCT_AtB: 3078 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3079 break; 3080 case MATPRODUCT_ABt: 3081 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3082 break; 3083 default: 3084 break; 3085 } 3086 PetscFunctionReturn(0); 3087 } 3088