1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 32 if (flg) *B = mat->A; 33 else *B = A; 34 PetscFunctionReturn(0); 35 } 36 37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 38 { 39 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 40 PetscErrorCode ierr; 41 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 42 43 PetscFunctionBegin; 44 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 45 lrow = row - rstart; 46 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 51 { 52 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 53 PetscErrorCode ierr; 54 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 55 56 PetscFunctionBegin; 57 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 58 lrow = row - rstart; 59 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 63 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 64 { 65 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 66 PetscErrorCode ierr; 67 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 68 PetscScalar *array; 69 MPI_Comm comm; 70 PetscBool cong; 71 Mat B; 72 73 PetscFunctionBegin; 74 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 75 if (!cong) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 76 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 77 if (!B) { 78 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 79 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 80 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 81 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 82 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 83 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 84 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 85 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 86 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 87 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 88 *a = B; 89 ierr = MatDestroy(&B);CHKERRQ(ierr); 90 } else *a = B; 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 95 { 96 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 97 PetscErrorCode ierr; 98 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 99 PetscBool roworiented = A->roworiented; 100 101 PetscFunctionBegin; 102 for (i=0; i<m; i++) { 103 if (idxm[i] < 0) continue; 104 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 105 if (idxm[i] >= rstart && idxm[i] < rend) { 106 row = idxm[i] - rstart; 107 if (roworiented) { 108 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 109 } else { 110 for (j=0; j<n; j++) { 111 if (idxn[j] < 0) continue; 112 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 113 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 114 } 115 } 116 } else if (!A->donotstash) { 117 mat->assembled = PETSC_FALSE; 118 if (roworiented) { 119 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 120 } else { 121 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 122 } 123 } 124 } 125 PetscFunctionReturn(0); 126 } 127 128 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 129 { 130 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 131 PetscErrorCode ierr; 132 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 133 134 PetscFunctionBegin; 135 for (i=0; i<m; i++) { 136 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 137 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 138 if (idxm[i] >= rstart && idxm[i] < rend) { 139 row = idxm[i] - rstart; 140 for (j=0; j<n; j++) { 141 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 142 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 144 } 145 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 146 } 147 PetscFunctionReturn(0); 148 } 149 150 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 151 { 152 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 157 PetscFunctionReturn(0); 158 } 159 160 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 161 { 162 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 163 PetscErrorCode ierr; 164 165 PetscFunctionBegin; 166 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 191 { 192 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 197 PetscFunctionReturn(0); 198 } 199 200 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 201 { 202 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 203 PetscErrorCode ierr; 204 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 205 const PetscInt *irow,*icol; 206 const PetscScalar *v; 207 PetscScalar *bv; 208 Mat newmat; 209 IS iscol_local; 210 MPI_Comm comm_is,comm_mat; 211 212 PetscFunctionBegin; 213 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 214 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 215 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 216 217 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 218 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 219 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 220 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 221 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 222 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 223 224 /* No parallel redistribution currently supported! Should really check each index set 225 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 226 original matrix! */ 227 228 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 229 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 230 231 /* Check submatrix call */ 232 if (scall == MAT_REUSE_MATRIX) { 233 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 234 /* Really need to test rows and column sizes! */ 235 newmat = *B; 236 } else { 237 /* Create and fill new matrix */ 238 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 239 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 240 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 241 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 242 } 243 244 /* Now extract the data pointers and do the copy, column at a time */ 245 newmatd = (Mat_MPIDense*)newmat->data; 246 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 247 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 248 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 249 for (i=0; i<Ncols; i++) { 250 const PetscScalar *av = v + lda*icol[i]; 251 for (j=0; j<nrows; j++) { 252 *bv++ = av[irow[j] - rstart]; 253 } 254 } 255 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 256 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 257 258 /* Assemble the matrices so that the correct flags are set */ 259 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 260 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 261 262 /* Free work space */ 263 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 264 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 265 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 266 *B = newmat; 267 PetscFunctionReturn(0); 268 } 269 270 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 271 { 272 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 281 { 282 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 291 { 292 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 293 PetscErrorCode ierr; 294 PetscInt nstash,reallocs; 295 296 PetscFunctionBegin; 297 if (mdn->donotstash || mat->nooffprocentries) return(0); 298 299 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 300 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 301 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 306 { 307 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 308 PetscErrorCode ierr; 309 PetscInt i,*row,*col,flg,j,rstart,ncols; 310 PetscMPIInt n; 311 PetscScalar *val; 312 313 PetscFunctionBegin; 314 if (!mdn->donotstash && !mat->nooffprocentries) { 315 /* wait on receives */ 316 while (1) { 317 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 318 if (!flg) break; 319 320 for (i=0; i<n;) { 321 /* Now identify the consecutive vals belonging to the same row */ 322 for (j=i,rstart=row[j]; j<n; j++) { 323 if (row[j] != rstart) break; 324 } 325 if (j < n) ncols = j-i; 326 else ncols = n-i; 327 /* Now assemble all these values with a single function call */ 328 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 329 i = j; 330 } 331 } 332 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 333 } 334 335 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 336 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 337 338 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 339 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 340 } 341 PetscFunctionReturn(0); 342 } 343 344 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 345 { 346 PetscErrorCode ierr; 347 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 348 349 PetscFunctionBegin; 350 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 354 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 355 { 356 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 357 PetscErrorCode ierr; 358 PetscInt i,len,*lrows; 359 360 PetscFunctionBegin; 361 /* get locally owned rows */ 362 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 363 /* fix right hand side if needed */ 364 if (x && b) { 365 const PetscScalar *xx; 366 PetscScalar *bb; 367 368 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 369 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 370 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 371 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 372 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 373 } 374 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 375 if (diag != 0.0) { 376 Vec d; 377 378 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 379 ierr = VecSet(d,diag);CHKERRQ(ierr); 380 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecDestroy(&d);CHKERRQ(ierr); 382 } 383 ierr = PetscFree(lrows);CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 387 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 388 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 389 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 390 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 391 392 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 395 PetscErrorCode ierr; 396 const PetscScalar *ax; 397 PetscScalar *ay; 398 399 PetscFunctionBegin; 400 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 401 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 402 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 403 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 404 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 405 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 406 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 411 { 412 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 413 PetscErrorCode ierr; 414 const PetscScalar *ax; 415 PetscScalar *ay; 416 417 PetscFunctionBegin; 418 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 419 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 420 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 421 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 422 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 423 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 424 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 429 { 430 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 431 PetscErrorCode ierr; 432 const PetscScalar *ax; 433 PetscScalar *ay; 434 435 PetscFunctionBegin; 436 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 437 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 438 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 439 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 440 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 441 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 442 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 443 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 448 { 449 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 450 PetscErrorCode ierr; 451 const PetscScalar *ax; 452 PetscScalar *ay; 453 454 PetscFunctionBegin; 455 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 456 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 457 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 458 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 459 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 460 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 461 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 462 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 467 { 468 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 469 PetscErrorCode ierr; 470 PetscInt lda,len,i,n,m = A->rmap->n,radd; 471 PetscScalar *x,zero = 0.0; 472 const PetscScalar *av; 473 474 PetscFunctionBegin; 475 ierr = VecSet(v,zero);CHKERRQ(ierr); 476 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 477 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 478 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 479 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 480 radd = A->rmap->rstart*m; 481 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 482 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 483 for (i=0; i<len; i++) { 484 x[i] = av[radd + i*lda + i]; 485 } 486 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 487 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatDestroy_MPIDense(Mat mat) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 496 PetscFunctionBegin; 497 #if defined(PETSC_USE_LOG) 498 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 499 #endif 500 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 501 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 502 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 503 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 504 505 ierr = PetscFree(mat->data);CHKERRQ(ierr); 506 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 507 508 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 509 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 510 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 511 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 512 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 513 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 514 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 515 #if defined(PETSC_HAVE_ELEMENTAL) 516 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 517 #endif 518 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 519 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 520 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 521 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 522 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 523 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 524 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 525 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 526 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 527 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 528 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } 531 532 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 533 534 #include <petscdraw.h> 535 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 536 { 537 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 538 PetscErrorCode ierr; 539 PetscMPIInt rank = mdn->rank; 540 PetscViewerType vtype; 541 PetscBool iascii,isdraw; 542 PetscViewer sviewer; 543 PetscViewerFormat format; 544 545 PetscFunctionBegin; 546 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 547 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 548 if (iascii) { 549 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 550 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 551 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 552 MatInfo info; 553 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 554 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 555 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 556 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 557 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 558 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } else if (format == PETSC_VIEWER_ASCII_INFO) { 561 PetscFunctionReturn(0); 562 } 563 } else if (isdraw) { 564 PetscDraw draw; 565 PetscBool isnull; 566 567 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 568 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 569 if (isnull) PetscFunctionReturn(0); 570 } 571 572 { 573 /* assemble the entire matrix onto first processor. */ 574 Mat A; 575 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 576 PetscInt *cols; 577 PetscScalar *vals; 578 579 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 580 if (!rank) { 581 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 582 } else { 583 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 584 } 585 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 586 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 587 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 588 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 589 590 /* Copy the matrix ... This isn't the most efficient means, 591 but it's quick for now */ 592 A->insertmode = INSERT_VALUES; 593 594 row = mat->rmap->rstart; 595 m = mdn->A->rmap->n; 596 for (i=0; i<m; i++) { 597 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 598 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 599 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 600 row++; 601 } 602 603 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 604 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 605 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 606 if (!rank) { 607 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 608 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 609 } 610 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 611 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 612 ierr = MatDestroy(&A);CHKERRQ(ierr); 613 } 614 PetscFunctionReturn(0); 615 } 616 617 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 618 { 619 PetscErrorCode ierr; 620 PetscBool iascii,isbinary,isdraw,issocket; 621 622 PetscFunctionBegin; 623 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 624 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 625 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 626 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 627 628 if (iascii || issocket || isdraw) { 629 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 630 } else if (isbinary) { 631 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 632 } 633 PetscFunctionReturn(0); 634 } 635 636 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 637 { 638 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 639 Mat mdn = mat->A; 640 PetscErrorCode ierr; 641 PetscLogDouble isend[5],irecv[5]; 642 643 PetscFunctionBegin; 644 info->block_size = 1.0; 645 646 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 647 648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 649 isend[3] = info->memory; isend[4] = info->mallocs; 650 if (flag == MAT_LOCAL) { 651 info->nz_used = isend[0]; 652 info->nz_allocated = isend[1]; 653 info->nz_unneeded = isend[2]; 654 info->memory = isend[3]; 655 info->mallocs = isend[4]; 656 } else if (flag == MAT_GLOBAL_MAX) { 657 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 658 659 info->nz_used = irecv[0]; 660 info->nz_allocated = irecv[1]; 661 info->nz_unneeded = irecv[2]; 662 info->memory = irecv[3]; 663 info->mallocs = irecv[4]; 664 } else if (flag == MAT_GLOBAL_SUM) { 665 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 666 667 info->nz_used = irecv[0]; 668 info->nz_allocated = irecv[1]; 669 info->nz_unneeded = irecv[2]; 670 info->memory = irecv[3]; 671 info->mallocs = irecv[4]; 672 } 673 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 674 info->fill_ratio_needed = 0; 675 info->factor_mallocs = 0; 676 PetscFunctionReturn(0); 677 } 678 679 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 680 { 681 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 682 PetscErrorCode ierr; 683 684 PetscFunctionBegin; 685 switch (op) { 686 case MAT_NEW_NONZERO_LOCATIONS: 687 case MAT_NEW_NONZERO_LOCATION_ERR: 688 case MAT_NEW_NONZERO_ALLOCATION_ERR: 689 MatCheckPreallocated(A,1); 690 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 691 break; 692 case MAT_ROW_ORIENTED: 693 MatCheckPreallocated(A,1); 694 a->roworiented = flg; 695 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 696 break; 697 case MAT_NEW_DIAGONALS: 698 case MAT_KEEP_NONZERO_PATTERN: 699 case MAT_USE_HASH_TABLE: 700 case MAT_SORTED_FULL: 701 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 702 break; 703 case MAT_IGNORE_OFF_PROC_ENTRIES: 704 a->donotstash = flg; 705 break; 706 case MAT_SYMMETRIC: 707 case MAT_STRUCTURALLY_SYMMETRIC: 708 case MAT_HERMITIAN: 709 case MAT_SYMMETRY_ETERNAL: 710 case MAT_IGNORE_LOWER_TRIANGULAR: 711 case MAT_IGNORE_ZERO_ENTRIES: 712 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 713 break; 714 default: 715 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 721 { 722 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 723 const PetscScalar *l; 724 PetscScalar x,*v,*vv,*r; 725 PetscErrorCode ierr; 726 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 727 728 PetscFunctionBegin; 729 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 730 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 731 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 732 if (ll) { 733 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 734 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 735 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 736 for (i=0; i<m; i++) { 737 x = l[i]; 738 v = vv + i; 739 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 740 } 741 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 742 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 743 } 744 if (rr) { 745 const PetscScalar *ar; 746 747 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 748 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 749 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 750 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 751 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 752 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 753 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 754 for (i=0; i<n; i++) { 755 x = r[i]; 756 v = vv + i*lda; 757 for (j=0; j<m; j++) (*v++) *= x; 758 } 759 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 760 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 761 } 762 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 767 { 768 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 769 PetscErrorCode ierr; 770 PetscInt i,j; 771 PetscReal sum = 0.0; 772 const PetscScalar *av,*v; 773 774 PetscFunctionBegin; 775 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 776 v = av; 777 if (mdn->size == 1) { 778 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 779 } else { 780 if (type == NORM_FROBENIUS) { 781 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 782 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 783 } 784 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 785 *nrm = PetscSqrtReal(*nrm); 786 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 787 } else if (type == NORM_1) { 788 PetscReal *tmp,*tmp2; 789 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 790 *nrm = 0.0; 791 v = av; 792 for (j=0; j<mdn->A->cmap->n; j++) { 793 for (i=0; i<mdn->A->rmap->n; i++) { 794 tmp[j] += PetscAbsScalar(*v); v++; 795 } 796 } 797 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 798 for (j=0; j<A->cmap->N; j++) { 799 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 800 } 801 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 802 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 803 } else if (type == NORM_INFINITY) { /* max row norm */ 804 PetscReal ntemp; 805 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 806 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 807 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 808 } 809 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 810 PetscFunctionReturn(0); 811 } 812 813 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 816 Mat B; 817 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 818 PetscErrorCode ierr; 819 PetscInt j,i,lda; 820 PetscScalar *v; 821 822 PetscFunctionBegin; 823 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 824 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 825 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 826 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 827 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 828 } else B = *matout; 829 830 m = a->A->rmap->n; n = a->A->cmap->n; 831 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 832 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 833 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 834 for (i=0; i<m; i++) rwork[i] = rstart + i; 835 for (j=0; j<n; j++) { 836 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 837 v += lda; 838 } 839 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 840 ierr = PetscFree(rwork);CHKERRQ(ierr); 841 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 842 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 843 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 844 *matout = B; 845 } else { 846 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 852 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 853 854 PetscErrorCode MatSetUp_MPIDense(Mat A) 855 { 856 PetscErrorCode ierr; 857 858 PetscFunctionBegin; 859 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 860 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 861 if (!A->preallocated) { 862 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 863 } 864 PetscFunctionReturn(0); 865 } 866 867 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 868 { 869 PetscErrorCode ierr; 870 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 871 872 PetscFunctionBegin; 873 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 PetscErrorCode MatConjugate_MPIDense(Mat mat) 878 { 879 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 880 PetscErrorCode ierr; 881 882 PetscFunctionBegin; 883 ierr = MatConjugate(a->A);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatRealPart_MPIDense(Mat A) 888 { 889 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 890 PetscErrorCode ierr; 891 892 PetscFunctionBegin; 893 ierr = MatRealPart(a->A);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 898 { 899 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 900 PetscErrorCode ierr; 901 902 PetscFunctionBegin; 903 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 908 { 909 PetscErrorCode ierr; 910 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 911 912 PetscFunctionBegin; 913 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 914 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 915 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 920 921 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 922 { 923 PetscErrorCode ierr; 924 PetscInt i,n; 925 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 926 PetscReal *work; 927 928 PetscFunctionBegin; 929 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 930 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 931 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 932 if (type == NORM_2) { 933 for (i=0; i<n; i++) work[i] *= work[i]; 934 } 935 if (type == NORM_INFINITY) { 936 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 937 } else { 938 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 939 } 940 ierr = PetscFree(work);CHKERRQ(ierr); 941 if (type == NORM_2) { 942 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 #if defined(PETSC_HAVE_CUDA) 948 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 949 { 950 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 951 PetscErrorCode ierr; 952 953 PetscFunctionBegin; 954 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 959 { 960 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 961 PetscErrorCode ierr; 962 963 PetscFunctionBegin; 964 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 969 { 970 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 979 { 980 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 981 PetscErrorCode ierr; 982 983 PetscFunctionBegin; 984 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 988 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 989 { 990 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 991 PetscErrorCode ierr; 992 993 PetscFunctionBegin; 994 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 999 { 1000 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1009 { 1010 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1019 { 1020 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1029 { 1030 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 if (d->A) { 1035 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1036 } 1037 mat->boundtocpu = bind; 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1042 { 1043 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1044 PetscErrorCode ierr; 1045 PetscBool iscuda; 1046 1047 PetscFunctionBegin; 1048 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1049 if (!iscuda) PetscFunctionReturn(0); 1050 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1051 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1052 if (!d->A) { 1053 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1054 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1055 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1056 } 1057 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1058 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1059 A->preallocated = PETSC_TRUE; 1060 PetscFunctionReturn(0); 1061 } 1062 #endif 1063 1064 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1065 { 1066 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1067 PetscErrorCode ierr; 1068 1069 PetscFunctionBegin; 1070 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1075 1076 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1077 { 1078 PetscFunctionBegin; 1079 *missing = PETSC_FALSE; 1080 PetscFunctionReturn(0); 1081 } 1082 1083 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1084 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1085 1086 /* -------------------------------------------------------------------*/ 1087 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1088 MatGetRow_MPIDense, 1089 MatRestoreRow_MPIDense, 1090 MatMult_MPIDense, 1091 /* 4*/ MatMultAdd_MPIDense, 1092 MatMultTranspose_MPIDense, 1093 MatMultTransposeAdd_MPIDense, 1094 0, 1095 0, 1096 0, 1097 /* 10*/ 0, 1098 0, 1099 0, 1100 0, 1101 MatTranspose_MPIDense, 1102 /* 15*/ MatGetInfo_MPIDense, 1103 MatEqual_MPIDense, 1104 MatGetDiagonal_MPIDense, 1105 MatDiagonalScale_MPIDense, 1106 MatNorm_MPIDense, 1107 /* 20*/ MatAssemblyBegin_MPIDense, 1108 MatAssemblyEnd_MPIDense, 1109 MatSetOption_MPIDense, 1110 MatZeroEntries_MPIDense, 1111 /* 24*/ MatZeroRows_MPIDense, 1112 0, 1113 0, 1114 0, 1115 0, 1116 /* 29*/ MatSetUp_MPIDense, 1117 0, 1118 0, 1119 MatGetDiagonalBlock_MPIDense, 1120 0, 1121 /* 34*/ MatDuplicate_MPIDense, 1122 0, 1123 0, 1124 0, 1125 0, 1126 /* 39*/ MatAXPY_MPIDense, 1127 MatCreateSubMatrices_MPIDense, 1128 0, 1129 MatGetValues_MPIDense, 1130 0, 1131 /* 44*/ 0, 1132 MatScale_MPIDense, 1133 MatShift_Basic, 1134 0, 1135 0, 1136 /* 49*/ MatSetRandom_MPIDense, 1137 0, 1138 0, 1139 0, 1140 0, 1141 /* 54*/ 0, 1142 0, 1143 0, 1144 0, 1145 0, 1146 /* 59*/ MatCreateSubMatrix_MPIDense, 1147 MatDestroy_MPIDense, 1148 MatView_MPIDense, 1149 0, 1150 0, 1151 /* 64*/ 0, 1152 0, 1153 0, 1154 0, 1155 0, 1156 /* 69*/ 0, 1157 0, 1158 0, 1159 0, 1160 0, 1161 /* 74*/ 0, 1162 0, 1163 0, 1164 0, 1165 0, 1166 /* 79*/ 0, 1167 0, 1168 0, 1169 0, 1170 /* 83*/ MatLoad_MPIDense, 1171 0, 1172 0, 1173 0, 1174 0, 1175 0, 1176 /* 89*/ 0, 1177 0, 1178 MatMatMultNumeric_MPIDense, 1179 0, 1180 0, 1181 /* 94*/ 0, 1182 0, 1183 0, 1184 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1185 0, 1186 /* 99*/ MatProductSetFromOptions_MPIDense, 1187 0, 1188 0, 1189 MatConjugate_MPIDense, 1190 0, 1191 /*104*/ 0, 1192 MatRealPart_MPIDense, 1193 MatImaginaryPart_MPIDense, 1194 0, 1195 0, 1196 /*109*/ 0, 1197 0, 1198 0, 1199 MatGetColumnVector_MPIDense, 1200 MatMissingDiagonal_MPIDense, 1201 /*114*/ 0, 1202 0, 1203 0, 1204 0, 1205 0, 1206 /*119*/ 0, 1207 0, 1208 0, 1209 0, 1210 0, 1211 /*124*/ 0, 1212 MatGetColumnNorms_MPIDense, 1213 0, 1214 0, 1215 0, 1216 /*129*/ 0, 1217 0, 1218 0, 1219 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1220 0, 1221 /*134*/ 0, 1222 0, 1223 0, 1224 0, 1225 0, 1226 /*139*/ 0, 1227 0, 1228 0, 1229 0, 1230 0, 1231 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1232 /*145*/ 0, 1233 0, 1234 0 1235 }; 1236 1237 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1238 { 1239 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1240 PetscBool iscuda; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1245 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1246 if (!a->A) { 1247 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1248 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1249 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1250 } 1251 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1252 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1253 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1254 mat->preallocated = PETSC_TRUE; 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #if defined(PETSC_HAVE_ELEMENTAL) 1259 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1260 { 1261 Mat mat_elemental; 1262 PetscErrorCode ierr; 1263 PetscScalar *v; 1264 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1265 1266 PetscFunctionBegin; 1267 if (reuse == MAT_REUSE_MATRIX) { 1268 mat_elemental = *newmat; 1269 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1270 } else { 1271 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1272 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1273 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1274 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1275 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1276 } 1277 1278 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1279 for (i=0; i<N; i++) cols[i] = i; 1280 for (i=0; i<m; i++) rows[i] = rstart + i; 1281 1282 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1283 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1284 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1285 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1286 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1287 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1288 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1289 1290 if (reuse == MAT_INPLACE_MATRIX) { 1291 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1292 } else { 1293 *newmat = mat_elemental; 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 #endif 1298 1299 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1300 { 1301 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1302 PetscErrorCode ierr; 1303 1304 PetscFunctionBegin; 1305 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1310 { 1311 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1320 { 1321 PetscErrorCode ierr; 1322 Mat_MPIDense *mat; 1323 PetscInt m,nloc,N; 1324 1325 PetscFunctionBegin; 1326 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1327 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1328 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1329 PetscInt sum; 1330 1331 if (n == PETSC_DECIDE) { 1332 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1333 } 1334 /* Check sum(n) = N */ 1335 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1336 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1337 1338 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1339 } 1340 1341 /* numeric phase */ 1342 mat = (Mat_MPIDense*)(*outmat)->data; 1343 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1344 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1345 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #if defined(PETSC_HAVE_CUDA) 1350 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1351 { 1352 Mat B; 1353 Mat_MPIDense *m; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 if (reuse == MAT_INITIAL_MATRIX) { 1358 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1359 } else if (reuse == MAT_REUSE_MATRIX) { 1360 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1361 } 1362 1363 B = *newmat; 1364 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1365 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1366 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1367 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1368 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1369 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1370 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1371 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1372 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1373 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1374 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1375 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1376 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1377 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1378 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1379 m = (Mat_MPIDense*)(B)->data; 1380 if (m->A) { 1381 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1382 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1383 } 1384 B->ops->bindtocpu = NULL; 1385 B->offloadmask = PETSC_OFFLOAD_CPU; 1386 PetscFunctionReturn(0); 1387 } 1388 1389 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1390 { 1391 Mat B; 1392 Mat_MPIDense *m; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 if (reuse == MAT_INITIAL_MATRIX) { 1397 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1398 } else if (reuse == MAT_REUSE_MATRIX) { 1399 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1400 } 1401 1402 B = *newmat; 1403 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1404 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1405 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1406 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1407 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1408 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1409 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1410 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1411 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1412 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1413 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1414 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1415 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1416 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1417 m = (Mat_MPIDense*)(B)->data; 1418 if (m->A) { 1419 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1420 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1421 B->offloadmask = PETSC_OFFLOAD_BOTH; 1422 } else { 1423 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1424 } 1425 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1426 1427 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1428 PetscFunctionReturn(0); 1429 } 1430 #endif 1431 1432 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1433 { 1434 Mat_MPIDense *a; 1435 PetscErrorCode ierr; 1436 1437 PetscFunctionBegin; 1438 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1439 mat->data = (void*)a; 1440 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1441 1442 mat->insertmode = NOT_SET_VALUES; 1443 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1444 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1445 1446 /* build cache for off array entries formed */ 1447 a->donotstash = PETSC_FALSE; 1448 1449 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1450 1451 /* stuff used for matrix vector multiply */ 1452 a->lvec = 0; 1453 a->Mvctx = 0; 1454 a->roworiented = PETSC_TRUE; 1455 1456 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1457 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1458 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1459 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1460 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1461 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1462 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1463 #if defined(PETSC_HAVE_ELEMENTAL) 1464 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1465 #endif 1466 #if defined(PETSC_HAVE_CUDA) 1467 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1468 #endif 1469 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1470 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1471 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1472 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1473 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1474 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1475 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1476 1477 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1478 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1479 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1480 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1481 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 /*MC 1486 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1487 1488 Options Database Keys: 1489 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1490 1491 Level: beginner 1492 1493 .seealso: 1494 1495 M*/ 1496 #if defined(PETSC_HAVE_CUDA) 1497 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1498 { 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1503 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 #endif 1507 1508 /*MC 1509 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1510 1511 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1512 and MATMPIDENSE otherwise. 1513 1514 Options Database Keys: 1515 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1516 1517 Level: beginner 1518 1519 1520 .seealso: MATSEQDENSE,MATMPIDENSE 1521 M*/ 1522 1523 /*@C 1524 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1525 1526 Not collective 1527 1528 Input Parameters: 1529 . B - the matrix 1530 - data - optional location of matrix data. Set data=NULL for PETSc 1531 to control all matrix memory allocation. 1532 1533 Notes: 1534 The dense format is fully compatible with standard Fortran 77 1535 storage by columns. 1536 1537 The data input variable is intended primarily for Fortran programmers 1538 who wish to allocate their own matrix memory space. Most users should 1539 set data=NULL. 1540 1541 Level: intermediate 1542 1543 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1544 @*/ 1545 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1546 { 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 /*@ 1555 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1556 array provided by the user. This is useful to avoid copying an array 1557 into a matrix 1558 1559 Not Collective 1560 1561 Input Parameters: 1562 + mat - the matrix 1563 - array - the array in column major order 1564 1565 Notes: 1566 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1567 freed when the matrix is destroyed. 1568 1569 Level: developer 1570 1571 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1572 1573 @*/ 1574 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 1575 { 1576 PetscErrorCode ierr; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1580 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1581 #if defined(PETSC_HAVE_CUDA) 1582 mat->offloadmask = PETSC_OFFLOAD_CPU; 1583 #endif 1584 PetscFunctionReturn(0); 1585 } 1586 1587 /*@ 1588 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1589 1590 Not Collective 1591 1592 Input Parameters: 1593 . mat - the matrix 1594 1595 Notes: 1596 You can only call this after a call to MatDensePlaceArray() 1597 1598 Level: developer 1599 1600 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1601 1602 @*/ 1603 PetscErrorCode MatDenseResetArray(Mat mat) 1604 { 1605 PetscErrorCode ierr; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1609 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 #if defined(PETSC_HAVE_CUDA) 1614 /*@C 1615 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 1616 array provided by the user. This is useful to avoid copying an array 1617 into a matrix 1618 1619 Not Collective 1620 1621 Input Parameters: 1622 + mat - the matrix 1623 - array - the array in column major order 1624 1625 Notes: 1626 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 1627 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 1628 1629 Level: developer 1630 1631 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 1632 @*/ 1633 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 1634 { 1635 PetscErrorCode ierr; 1636 1637 PetscFunctionBegin; 1638 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1639 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1640 mat->offloadmask = PETSC_OFFLOAD_GPU; 1641 PetscFunctionReturn(0); 1642 } 1643 1644 /*@C 1645 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 1646 1647 Not Collective 1648 1649 Input Parameters: 1650 . mat - the matrix 1651 1652 Notes: 1653 You can only call this after a call to MatDenseCUDAPlaceArray() 1654 1655 Level: developer 1656 1657 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 1658 1659 @*/ 1660 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 1661 { 1662 PetscErrorCode ierr; 1663 1664 PetscFunctionBegin; 1665 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1666 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 /*@C 1671 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 1672 1673 Not Collective 1674 1675 Input Parameters: 1676 . A - the matrix 1677 1678 Output Parameters 1679 . array - the GPU array in column major order 1680 1681 Notes: 1682 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 1683 1684 Level: developer 1685 1686 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 1687 @*/ 1688 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 1689 { 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 1694 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 /*@C 1699 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 1700 1701 Not Collective 1702 1703 Input Parameters: 1704 + A - the matrix 1705 - array - the GPU array in column major order 1706 1707 Notes: 1708 1709 Level: developer 1710 1711 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 1712 @*/ 1713 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 1714 { 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 1719 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 1720 A->offloadmask = PETSC_OFFLOAD_GPU; 1721 PetscFunctionReturn(0); 1722 } 1723 1724 /*@C 1725 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 1726 1727 Not Collective 1728 1729 Input Parameters: 1730 . A - the matrix 1731 1732 Output Parameters 1733 . array - the GPU array in column major order 1734 1735 Notes: 1736 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 1737 1738 Level: developer 1739 1740 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 1741 @*/ 1742 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 1743 { 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 /*@C 1752 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 1753 1754 Not Collective 1755 1756 Input Parameters: 1757 + A - the matrix 1758 - array - the GPU array in column major order 1759 1760 Notes: 1761 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 1762 1763 Level: developer 1764 1765 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 1766 @*/ 1767 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 1768 { 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 /*@C 1777 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 1778 1779 Not Collective 1780 1781 Input Parameters: 1782 . A - the matrix 1783 1784 Output Parameters 1785 . array - the GPU array in column major order 1786 1787 Notes: 1788 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 1789 1790 Level: developer 1791 1792 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 1793 @*/ 1794 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 1795 { 1796 PetscErrorCode ierr; 1797 1798 PetscFunctionBegin; 1799 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 1800 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 /*@C 1805 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 1806 1807 Not Collective 1808 1809 Input Parameters: 1810 + A - the matrix 1811 - array - the GPU array in column major order 1812 1813 Notes: 1814 1815 Level: developer 1816 1817 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 1818 @*/ 1819 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 1820 { 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 1825 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 1826 A->offloadmask = PETSC_OFFLOAD_GPU; 1827 PetscFunctionReturn(0); 1828 } 1829 #endif 1830 1831 /*@C 1832 MatCreateDense - Creates a matrix in dense format. 1833 1834 Collective 1835 1836 Input Parameters: 1837 + comm - MPI communicator 1838 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1839 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1840 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1841 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1842 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1843 to control all matrix memory allocation. 1844 1845 Output Parameter: 1846 . A - the matrix 1847 1848 Notes: 1849 The dense format is fully compatible with standard Fortran 77 1850 storage by columns. 1851 1852 The data input variable is intended primarily for Fortran programmers 1853 who wish to allocate their own matrix memory space. Most users should 1854 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1855 1856 The user MUST specify either the local or global matrix dimensions 1857 (possibly both). 1858 1859 Level: intermediate 1860 1861 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1862 @*/ 1863 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1864 { 1865 PetscErrorCode ierr; 1866 PetscMPIInt size; 1867 1868 PetscFunctionBegin; 1869 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1870 PetscValidLogicalCollectiveBool(*A,!!data,6); 1871 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1872 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1873 if (size > 1) { 1874 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1875 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1876 if (data) { /* user provided data array, so no need to assemble */ 1877 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1878 (*A)->assembled = PETSC_TRUE; 1879 } 1880 } else { 1881 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1882 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #if defined(PETSC_HAVE_CUDA) 1888 /*@C 1889 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 1890 1891 Collective 1892 1893 Input Parameters: 1894 + comm - MPI communicator 1895 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1896 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1897 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1898 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1899 - data - optional location of GPU matrix data. Set data=NULL for PETSc 1900 to control matrix memory allocation. 1901 1902 Output Parameter: 1903 . A - the matrix 1904 1905 Notes: 1906 1907 Level: intermediate 1908 1909 .seealso: MatCreate(), MatCreateDense() 1910 @*/ 1911 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1912 { 1913 PetscErrorCode ierr; 1914 PetscMPIInt size; 1915 1916 PetscFunctionBegin; 1917 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1918 PetscValidLogicalCollectiveBool(*A,!!data,6); 1919 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1920 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1921 if (size > 1) { 1922 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 1923 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 1924 if (data) { /* user provided data array, so no need to assemble */ 1925 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1926 (*A)->assembled = PETSC_TRUE; 1927 } 1928 } else { 1929 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 1930 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 #endif 1935 1936 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1937 { 1938 Mat mat; 1939 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1940 PetscErrorCode ierr; 1941 1942 PetscFunctionBegin; 1943 *newmat = 0; 1944 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1945 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1946 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1947 a = (Mat_MPIDense*)mat->data; 1948 1949 mat->factortype = A->factortype; 1950 mat->assembled = PETSC_TRUE; 1951 mat->preallocated = PETSC_TRUE; 1952 1953 a->size = oldmat->size; 1954 a->rank = oldmat->rank; 1955 mat->insertmode = NOT_SET_VALUES; 1956 a->donotstash = oldmat->donotstash; 1957 1958 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1959 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1960 1961 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1962 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1963 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1964 1965 *newmat = mat; 1966 PetscFunctionReturn(0); 1967 } 1968 1969 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1970 { 1971 PetscErrorCode ierr; 1972 PetscBool isbinary; 1973 #if defined(PETSC_HAVE_HDF5) 1974 PetscBool ishdf5; 1975 #endif 1976 1977 PetscFunctionBegin; 1978 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1979 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1980 /* force binary viewer to load .info file if it has not yet done so */ 1981 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1982 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1983 #if defined(PETSC_HAVE_HDF5) 1984 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1985 #endif 1986 if (isbinary) { 1987 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 1988 #if defined(PETSC_HAVE_HDF5) 1989 } else if (ishdf5) { 1990 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1991 #endif 1992 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1997 { 1998 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1999 Mat a,b; 2000 PetscBool flg; 2001 PetscErrorCode ierr; 2002 2003 PetscFunctionBegin; 2004 a = matA->A; 2005 b = matB->A; 2006 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2007 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 2012 { 2013 PetscErrorCode ierr; 2014 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2015 Mat_TransMatMultDense *atb = a->atbdense; 2016 2017 PetscFunctionBegin; 2018 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2019 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2020 ierr = (*atb->destroy)(A);CHKERRQ(ierr); 2021 ierr = PetscFree(atb);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 2026 { 2027 PetscErrorCode ierr; 2028 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2029 Mat_MatTransMultDense *abt = a->abtdense; 2030 2031 PetscFunctionBegin; 2032 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2033 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2034 ierr = (abt->destroy)(A);CHKERRQ(ierr); 2035 ierr = PetscFree(abt);CHKERRQ(ierr); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2040 { 2041 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2042 Mat_TransMatMultDense *atb = c->atbdense; 2043 PetscErrorCode ierr; 2044 MPI_Comm comm; 2045 PetscMPIInt size,*recvcounts=atb->recvcounts; 2046 PetscScalar *carray,*sendbuf=atb->sendbuf; 2047 const PetscScalar *atbarray; 2048 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2049 const PetscInt *ranges; 2050 2051 PetscFunctionBegin; 2052 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2053 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2054 2055 /* compute atbarray = aseq^T * bseq */ 2056 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2057 2058 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2059 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2060 2061 /* arrange atbarray into sendbuf */ 2062 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2063 for (proc=0, k=0; proc<size; proc++) { 2064 for (j=0; j<cN; j++) { 2065 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2066 } 2067 } 2068 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2069 2070 /* sum all atbarray to local values of C */ 2071 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 2072 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2073 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2078 { 2079 PetscErrorCode ierr; 2080 MPI_Comm comm; 2081 PetscMPIInt size; 2082 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2083 Mat_MPIDense *c; 2084 Mat_TransMatMultDense *atb; 2085 2086 PetscFunctionBegin; 2087 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2088 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2089 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2090 } 2091 2092 /* create matrix product C */ 2093 ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2094 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2095 ierr = MatSetUp(C);CHKERRQ(ierr); 2096 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2098 2099 /* create data structure for reuse C */ 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 ierr = PetscNew(&atb);CHKERRQ(ierr); 2102 cM = C->rmap->N; 2103 ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2104 2105 c = (Mat_MPIDense*)C->data; 2106 c->atbdense = atb; 2107 atb->destroy = C->ops->destroy; 2108 C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2109 PetscFunctionReturn(0); 2110 } 2111 2112 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2113 { 2114 PetscErrorCode ierr; 2115 MPI_Comm comm; 2116 PetscMPIInt i, size; 2117 PetscInt maxRows, bufsiz; 2118 Mat_MPIDense *c; 2119 PetscMPIInt tag; 2120 PetscInt alg; 2121 Mat_MatTransMultDense *abt; 2122 Mat_Product *product = C->product; 2123 PetscBool flg; 2124 2125 PetscFunctionBegin; 2126 /* check local size of A and B */ 2127 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2128 2129 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2130 alg = flg ? 0 : 1; 2131 2132 /* setup matrix product C */ 2133 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2134 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2135 ierr = MatSetUp(C);CHKERRQ(ierr); 2136 ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 2137 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2139 2140 /* create data structure for reuse C */ 2141 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2143 ierr = PetscNew(&abt);CHKERRQ(ierr); 2144 abt->tag = tag; 2145 abt->alg = alg; 2146 switch (alg) { 2147 case 1: /* alg: "cyclic" */ 2148 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2149 bufsiz = A->cmap->N * maxRows; 2150 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2151 break; 2152 default: /* alg: "allgatherv" */ 2153 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2154 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2155 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2156 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2157 break; 2158 } 2159 2160 c = (Mat_MPIDense*)C->data; 2161 c->abtdense = abt; 2162 abt->destroy = C->ops->destroy; 2163 C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2164 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2169 { 2170 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2171 Mat_MatTransMultDense *abt = c->abtdense; 2172 PetscErrorCode ierr; 2173 MPI_Comm comm; 2174 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2175 PetscScalar *sendbuf, *recvbuf=0, *cv; 2176 PetscInt i,cK=A->cmap->N,k,j,bn; 2177 PetscScalar _DOne=1.0,_DZero=0.0; 2178 const PetscScalar *av,*bv; 2179 PetscBLASInt cm, cn, ck, alda, blda, clda; 2180 MPI_Request reqs[2]; 2181 const PetscInt *ranges; 2182 2183 PetscFunctionBegin; 2184 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2185 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2186 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2187 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2188 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2189 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2190 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2191 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2192 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2193 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2194 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2195 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2196 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2197 bn = B->rmap->n; 2198 if (blda == bn) { 2199 sendbuf = (PetscScalar*)bv; 2200 } else { 2201 sendbuf = abt->buf[0]; 2202 for (k = 0, i = 0; i < cK; i++) { 2203 for (j = 0; j < bn; j++, k++) { 2204 sendbuf[k] = bv[i * blda + j]; 2205 } 2206 } 2207 } 2208 if (size > 1) { 2209 sendto = (rank + size - 1) % size; 2210 recvfrom = (rank + size + 1) % size; 2211 } else { 2212 sendto = recvfrom = 0; 2213 } 2214 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2215 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2216 recvisfrom = rank; 2217 for (i = 0; i < size; i++) { 2218 /* we have finished receiving in sending, bufs can be read/modified */ 2219 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2220 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2221 2222 if (nextrecvisfrom != rank) { 2223 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2224 sendsiz = cK * bn; 2225 recvsiz = cK * nextbn; 2226 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2227 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2228 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2229 } 2230 2231 /* local aseq * sendbuf^T */ 2232 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2233 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2234 2235 if (nextrecvisfrom != rank) { 2236 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2237 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2238 } 2239 bn = nextbn; 2240 recvisfrom = nextrecvisfrom; 2241 sendbuf = recvbuf; 2242 } 2243 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2244 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2245 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } 2248 2249 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2250 { 2251 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2252 Mat_MatTransMultDense *abt = c->abtdense; 2253 PetscErrorCode ierr; 2254 MPI_Comm comm; 2255 PetscMPIInt size; 2256 PetscScalar *cv, *sendbuf, *recvbuf; 2257 const PetscScalar *av,*bv; 2258 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2259 PetscScalar _DOne=1.0,_DZero=0.0; 2260 PetscBLASInt cm, cn, ck, alda, clda; 2261 2262 PetscFunctionBegin; 2263 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2264 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2265 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2266 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2267 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2268 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2269 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2270 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2271 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2272 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2273 /* copy transpose of B into buf[0] */ 2274 bn = B->rmap->n; 2275 sendbuf = abt->buf[0]; 2276 recvbuf = abt->buf[1]; 2277 for (k = 0, j = 0; j < bn; j++) { 2278 for (i = 0; i < cK; i++, k++) { 2279 sendbuf[k] = bv[i * blda + j]; 2280 } 2281 } 2282 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2283 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2284 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2285 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2286 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2287 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2288 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2289 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2290 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2295 { 2296 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2297 Mat_MatTransMultDense *abt = c->abtdense; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 switch (abt->alg) { 2302 case 1: 2303 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2304 break; 2305 default: 2306 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2307 break; 2308 } 2309 PetscFunctionReturn(0); 2310 } 2311 2312 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2313 { 2314 PetscErrorCode ierr; 2315 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2316 Mat_MatMultDense *ab = a->abdense; 2317 2318 PetscFunctionBegin; 2319 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2320 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2321 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2322 2323 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2324 ierr = PetscFree(ab);CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #if defined(PETSC_HAVE_ELEMENTAL) 2329 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2330 { 2331 PetscErrorCode ierr; 2332 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2333 Mat_MatMultDense *ab=c->abdense; 2334 2335 PetscFunctionBegin; 2336 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2337 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2338 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2339 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2344 { 2345 PetscErrorCode ierr; 2346 Mat Ae,Be,Ce; 2347 Mat_MPIDense *c; 2348 Mat_MatMultDense *ab; 2349 2350 PetscFunctionBegin; 2351 /* check local size of A and B */ 2352 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2353 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2354 } 2355 2356 /* create elemental matrices Ae and Be */ 2357 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2358 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2359 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2360 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2361 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2362 2363 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2364 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2365 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2366 ierr = MatSetUp(Be);CHKERRQ(ierr); 2367 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2368 2369 /* compute symbolic Ce = Ae*Be */ 2370 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2371 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2372 2373 /* setup C */ 2374 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2375 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2376 ierr = MatSetUp(C);CHKERRQ(ierr); 2377 2378 /* create data structure for reuse Cdense */ 2379 ierr = PetscNew(&ab);CHKERRQ(ierr); 2380 c = (Mat_MPIDense*)C->data; 2381 c->abdense = ab; 2382 2383 ab->Ae = Ae; 2384 ab->Be = Be; 2385 ab->Ce = Ce; 2386 ab->destroy = C->ops->destroy; 2387 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2388 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2389 C->ops->productnumeric = MatProductNumeric_AB; 2390 PetscFunctionReturn(0); 2391 } 2392 #endif 2393 /* ----------------------------------------------- */ 2394 #if defined(PETSC_HAVE_ELEMENTAL) 2395 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2396 { 2397 PetscFunctionBegin; 2398 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2399 C->ops->productsymbolic = MatProductSymbolic_AB; 2400 C->ops->productnumeric = MatProductNumeric_AB; 2401 PetscFunctionReturn(0); 2402 } 2403 #endif 2404 2405 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 2406 { 2407 PetscErrorCode ierr; 2408 Mat_Product *product = C->product; 2409 2410 PetscFunctionBegin; 2411 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 2412 C->ops->productnumeric = MatProductNumeric_AtB; 2413 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 2414 PetscFunctionReturn(0); 2415 } 2416 2417 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2418 { 2419 Mat_Product *product = C->product; 2420 Mat A = product->A,B=product->B; 2421 2422 PetscFunctionBegin; 2423 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2424 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2425 2426 C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 2427 PetscFunctionReturn(0); 2428 } 2429 2430 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2431 { 2432 PetscErrorCode ierr; 2433 Mat_Product *product = C->product; 2434 const char *algTypes[2] = {"allgatherv","cyclic"}; 2435 PetscInt alg,nalg = 2; 2436 PetscBool flg = PETSC_FALSE; 2437 2438 PetscFunctionBegin; 2439 /* Set default algorithm */ 2440 alg = 0; /* default is allgatherv */ 2441 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2442 if (flg) { 2443 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2444 } 2445 2446 /* Get runtime option */ 2447 if (product->api_user) { 2448 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2449 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2450 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2451 } else { 2452 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2453 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2454 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2455 } 2456 if (flg) { 2457 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2458 } 2459 2460 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2461 C->ops->productsymbolic = MatProductSymbolic_ABt; 2462 C->ops->productnumeric = MatProductNumeric_ABt; 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2467 { 2468 PetscErrorCode ierr; 2469 Mat_Product *product = C->product; 2470 2471 PetscFunctionBegin; 2472 switch (product->type) { 2473 #if defined(PETSC_HAVE_ELEMENTAL) 2474 case MATPRODUCT_AB: 2475 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2476 break; 2477 #endif 2478 case MATPRODUCT_AtB: 2479 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2480 break; 2481 case MATPRODUCT_ABt: 2482 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2483 break; 2484 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 2485 } 2486 PetscFunctionReturn(0); 2487 } 2488