1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40 { 41 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44 45 PetscFunctionBegin; 46 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47 lrow = row - rstart; 48 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscErrorCode ierr; 56 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57 58 PetscFunctionBegin; 59 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60 lrow = row - rstart; 61 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 66 { 67 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 70 PetscScalar *array; 71 MPI_Comm comm; 72 PetscBool flg; 73 Mat B; 74 75 PetscFunctionBegin; 76 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 77 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79 if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */ 80 81 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 82 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 100 { 101 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102 PetscErrorCode ierr; 103 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104 PetscBool roworiented = A->roworiented; 105 106 PetscFunctionBegin; 107 for (i=0; i<m; i++) { 108 if (idxm[i] < 0) continue; 109 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 110 if (idxm[i] >= rstart && idxm[i] < rend) { 111 row = idxm[i] - rstart; 112 if (roworiented) { 113 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 114 } else { 115 for (j=0; j<n; j++) { 116 if (idxn[j] < 0) continue; 117 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 118 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 119 } 120 } 121 } else if (!A->donotstash) { 122 mat->assembled = PETSC_FALSE; 123 if (roworiented) { 124 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125 } else { 126 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 127 } 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134 { 135 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136 PetscErrorCode ierr; 137 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 for (j=0; j<n; j++) { 146 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149 } 150 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 156 { 157 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 166 { 167 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168 PetscErrorCode ierr; 169 170 PetscFunctionBegin; 171 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 172 PetscFunctionReturn(0); 173 } 174 175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 176 { 177 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 195 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 196 { 197 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 198 PetscErrorCode ierr; 199 200 PetscFunctionBegin; 201 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 206 { 207 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 208 PetscErrorCode ierr; 209 210 PetscFunctionBegin; 211 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 212 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 217 { 218 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 219 PetscErrorCode ierr; 220 221 PetscFunctionBegin; 222 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 223 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 228 { 229 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 230 PetscErrorCode ierr; 231 232 PetscFunctionBegin; 233 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 234 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 239 { 240 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 241 PetscErrorCode ierr; 242 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 243 const PetscInt *irow,*icol; 244 const PetscScalar *v; 245 PetscScalar *bv; 246 Mat newmat; 247 IS iscol_local; 248 MPI_Comm comm_is,comm_mat; 249 250 PetscFunctionBegin; 251 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 252 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 253 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 254 255 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 256 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 257 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 258 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 259 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 260 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 261 262 /* No parallel redistribution currently supported! Should really check each index set 263 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 264 original matrix! */ 265 266 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 267 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 268 269 /* Check submatrix call */ 270 if (scall == MAT_REUSE_MATRIX) { 271 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 272 /* Really need to test rows and column sizes! */ 273 newmat = *B; 274 } else { 275 /* Create and fill new matrix */ 276 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 277 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 278 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 279 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 280 } 281 282 /* Now extract the data pointers and do the copy, column at a time */ 283 newmatd = (Mat_MPIDense*)newmat->data; 284 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 285 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 286 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 287 for (i=0; i<Ncols; i++) { 288 const PetscScalar *av = v + lda*icol[i]; 289 for (j=0; j<nrows; j++) { 290 *bv++ = av[irow[j] - rstart]; 291 } 292 } 293 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 294 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 295 296 /* Assemble the matrices so that the correct flags are set */ 297 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 298 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 299 300 /* Free work space */ 301 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 302 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 303 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 304 *B = newmat; 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 309 { 310 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 319 { 320 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 321 PetscErrorCode ierr; 322 323 PetscFunctionBegin; 324 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 329 { 330 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 331 PetscErrorCode ierr; 332 333 PetscFunctionBegin; 334 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 339 { 340 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 341 PetscErrorCode ierr; 342 PetscInt nstash,reallocs; 343 344 PetscFunctionBegin; 345 if (mdn->donotstash || mat->nooffprocentries) return(0); 346 347 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 348 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 349 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 354 { 355 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 356 PetscErrorCode ierr; 357 PetscInt i,*row,*col,flg,j,rstart,ncols; 358 PetscMPIInt n; 359 PetscScalar *val; 360 361 PetscFunctionBegin; 362 if (!mdn->donotstash && !mat->nooffprocentries) { 363 /* wait on receives */ 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { 371 if (row[j] != rstart) break; 372 } 373 if (j < n) ncols = j-i; 374 else ncols = n-i; 375 /* Now assemble all these values with a single function call */ 376 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 377 i = j; 378 } 379 } 380 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 381 } 382 383 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 384 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 385 386 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 387 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 388 } 389 PetscFunctionReturn(0); 390 } 391 392 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 393 { 394 PetscErrorCode ierr; 395 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 396 397 PetscFunctionBegin; 398 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 403 { 404 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 405 PetscErrorCode ierr; 406 PetscInt i,len,*lrows; 407 408 PetscFunctionBegin; 409 /* get locally owned rows */ 410 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 411 /* fix right hand side if needed */ 412 if (x && b) { 413 const PetscScalar *xx; 414 PetscScalar *bb; 415 416 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 417 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 418 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 419 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 420 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 421 } 422 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 423 if (diag != 0.0) { 424 Vec d; 425 426 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 427 ierr = VecSet(d,diag);CHKERRQ(ierr); 428 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecDestroy(&d);CHKERRQ(ierr); 430 } 431 ierr = PetscFree(lrows);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 436 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 437 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 438 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 439 440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 441 { 442 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 443 PetscErrorCode ierr; 444 const PetscScalar *ax; 445 PetscScalar *ay; 446 447 PetscFunctionBegin; 448 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 449 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 450 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 451 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 452 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 453 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 454 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 const PetscScalar *ax; 463 PetscScalar *ay; 464 465 PetscFunctionBegin; 466 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 467 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 468 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 469 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 470 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 471 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 472 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 477 { 478 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 479 PetscErrorCode ierr; 480 const PetscScalar *ax; 481 PetscScalar *ay; 482 483 PetscFunctionBegin; 484 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 485 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 486 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 487 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 488 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 489 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 490 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 491 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 496 { 497 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 498 PetscErrorCode ierr; 499 const PetscScalar *ax; 500 PetscScalar *ay; 501 502 PetscFunctionBegin; 503 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 504 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 505 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 506 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 507 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 508 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 509 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 510 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 PetscErrorCode ierr; 518 PetscInt lda,len,i,n,m = A->rmap->n,radd; 519 PetscScalar *x,zero = 0.0; 520 const PetscScalar *av; 521 522 PetscFunctionBegin; 523 ierr = VecSet(v,zero);CHKERRQ(ierr); 524 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 525 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 526 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 527 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 528 radd = A->rmap->rstart*m; 529 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 530 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 531 for (i=0; i<len; i++) { 532 x[i] = av[radd + i*lda + i]; 533 } 534 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatDestroy_MPIDense(Mat mat) 540 { 541 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 542 PetscErrorCode ierr; 543 544 PetscFunctionBegin; 545 #if defined(PETSC_USE_LOG) 546 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 547 #endif 548 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 549 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 550 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 551 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 552 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 553 554 ierr = PetscFree(mat->data);CHKERRQ(ierr); 555 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 556 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 560 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 568 #if defined(PETSC_HAVE_ELEMENTAL) 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 570 #endif 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 574 #if defined (PETSC_HAVE_CUDA) 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 577 #endif 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 590 591 #include <petscdraw.h> 592 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 593 { 594 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 595 PetscErrorCode ierr; 596 PetscMPIInt rank = mdn->rank; 597 PetscViewerType vtype; 598 PetscBool iascii,isdraw; 599 PetscViewer sviewer; 600 PetscViewerFormat format; 601 602 PetscFunctionBegin; 603 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 604 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 605 if (iascii) { 606 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 607 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 608 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 609 MatInfo info; 610 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 611 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 612 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 613 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 614 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 615 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } else if (format == PETSC_VIEWER_ASCII_INFO) { 618 PetscFunctionReturn(0); 619 } 620 } else if (isdraw) { 621 PetscDraw draw; 622 PetscBool isnull; 623 624 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 625 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 626 if (isnull) PetscFunctionReturn(0); 627 } 628 629 { 630 /* assemble the entire matrix onto first processor. */ 631 Mat A; 632 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 633 PetscInt *cols; 634 PetscScalar *vals; 635 636 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 637 if (!rank) { 638 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 639 } else { 640 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 641 } 642 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 643 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 644 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 645 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 646 647 /* Copy the matrix ... This isn't the most efficient means, 648 but it's quick for now */ 649 A->insertmode = INSERT_VALUES; 650 651 row = mat->rmap->rstart; 652 m = mdn->A->rmap->n; 653 for (i=0; i<m; i++) { 654 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 655 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 656 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 657 row++; 658 } 659 660 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 661 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 662 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 665 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 666 } 667 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 668 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 669 ierr = MatDestroy(&A);CHKERRQ(ierr); 670 } 671 PetscFunctionReturn(0); 672 } 673 674 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 675 { 676 PetscErrorCode ierr; 677 PetscBool iascii,isbinary,isdraw,issocket; 678 679 PetscFunctionBegin; 680 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 681 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 682 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 683 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 684 685 if (iascii || issocket || isdraw) { 686 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 687 } else if (isbinary) { 688 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 689 } 690 PetscFunctionReturn(0); 691 } 692 693 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 694 { 695 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 696 Mat mdn = mat->A; 697 PetscErrorCode ierr; 698 PetscLogDouble isend[5],irecv[5]; 699 700 PetscFunctionBegin; 701 info->block_size = 1.0; 702 703 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 704 705 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 706 isend[3] = info->memory; isend[4] = info->mallocs; 707 if (flag == MAT_LOCAL) { 708 info->nz_used = isend[0]; 709 info->nz_allocated = isend[1]; 710 info->nz_unneeded = isend[2]; 711 info->memory = isend[3]; 712 info->mallocs = isend[4]; 713 } else if (flag == MAT_GLOBAL_MAX) { 714 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 715 716 info->nz_used = irecv[0]; 717 info->nz_allocated = irecv[1]; 718 info->nz_unneeded = irecv[2]; 719 info->memory = irecv[3]; 720 info->mallocs = irecv[4]; 721 } else if (flag == MAT_GLOBAL_SUM) { 722 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 723 724 info->nz_used = irecv[0]; 725 info->nz_allocated = irecv[1]; 726 info->nz_unneeded = irecv[2]; 727 info->memory = irecv[3]; 728 info->mallocs = irecv[4]; 729 } 730 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 731 info->fill_ratio_needed = 0; 732 info->factor_mallocs = 0; 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 737 { 738 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 739 PetscErrorCode ierr; 740 741 PetscFunctionBegin; 742 switch (op) { 743 case MAT_NEW_NONZERO_LOCATIONS: 744 case MAT_NEW_NONZERO_LOCATION_ERR: 745 case MAT_NEW_NONZERO_ALLOCATION_ERR: 746 MatCheckPreallocated(A,1); 747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 748 break; 749 case MAT_ROW_ORIENTED: 750 MatCheckPreallocated(A,1); 751 a->roworiented = flg; 752 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 753 break; 754 case MAT_NEW_DIAGONALS: 755 case MAT_KEEP_NONZERO_PATTERN: 756 case MAT_USE_HASH_TABLE: 757 case MAT_SORTED_FULL: 758 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 759 break; 760 case MAT_IGNORE_OFF_PROC_ENTRIES: 761 a->donotstash = flg; 762 break; 763 case MAT_SYMMETRIC: 764 case MAT_STRUCTURALLY_SYMMETRIC: 765 case MAT_HERMITIAN: 766 case MAT_SYMMETRY_ETERNAL: 767 case MAT_IGNORE_LOWER_TRIANGULAR: 768 case MAT_IGNORE_ZERO_ENTRIES: 769 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 770 break; 771 default: 772 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 778 { 779 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 780 const PetscScalar *l; 781 PetscScalar x,*v,*vv,*r; 782 PetscErrorCode ierr; 783 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 784 785 PetscFunctionBegin; 786 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 787 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 788 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 789 if (ll) { 790 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 791 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 792 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 793 for (i=0; i<m; i++) { 794 x = l[i]; 795 v = vv + i; 796 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 797 } 798 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 799 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 800 } 801 if (rr) { 802 const PetscScalar *ar; 803 804 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 805 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 806 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 807 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 808 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 809 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 810 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 811 for (i=0; i<n; i++) { 812 x = r[i]; 813 v = vv + i*lda; 814 for (j=0; j<m; j++) (*v++) *= x; 815 } 816 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 817 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 818 } 819 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 824 { 825 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 826 PetscErrorCode ierr; 827 PetscInt i,j; 828 PetscReal sum = 0.0; 829 const PetscScalar *av,*v; 830 831 PetscFunctionBegin; 832 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 833 v = av; 834 if (mdn->size == 1) { 835 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 836 } else { 837 if (type == NORM_FROBENIUS) { 838 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 839 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 840 } 841 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 842 *nrm = PetscSqrtReal(*nrm); 843 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 844 } else if (type == NORM_1) { 845 PetscReal *tmp,*tmp2; 846 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 847 *nrm = 0.0; 848 v = av; 849 for (j=0; j<mdn->A->cmap->n; j++) { 850 for (i=0; i<mdn->A->rmap->n; i++) { 851 tmp[j] += PetscAbsScalar(*v); v++; 852 } 853 } 854 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 855 for (j=0; j<A->cmap->N; j++) { 856 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 857 } 858 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 859 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 860 } else if (type == NORM_INFINITY) { /* max row norm */ 861 PetscReal ntemp; 862 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 863 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 864 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 865 } 866 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 871 { 872 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 873 Mat B; 874 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 875 PetscErrorCode ierr; 876 PetscInt j,i,lda; 877 PetscScalar *v; 878 879 PetscFunctionBegin; 880 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 881 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 882 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 883 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 884 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 885 } else B = *matout; 886 887 m = a->A->rmap->n; n = a->A->cmap->n; 888 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 889 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 890 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 891 for (i=0; i<m; i++) rwork[i] = rstart + i; 892 for (j=0; j<n; j++) { 893 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 894 v += lda; 895 } 896 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 897 ierr = PetscFree(rwork);CHKERRQ(ierr); 898 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 900 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 901 *matout = B; 902 } else { 903 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 904 } 905 PetscFunctionReturn(0); 906 } 907 908 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 909 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 910 911 PetscErrorCode MatSetUp_MPIDense(Mat A) 912 { 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 917 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 918 if (!A->preallocated) { 919 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 920 } 921 PetscFunctionReturn(0); 922 } 923 924 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 925 { 926 PetscErrorCode ierr; 927 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 928 929 PetscFunctionBegin; 930 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 934 PetscErrorCode MatConjugate_MPIDense(Mat mat) 935 { 936 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 937 PetscErrorCode ierr; 938 939 PetscFunctionBegin; 940 ierr = MatConjugate(a->A);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 944 PetscErrorCode MatRealPart_MPIDense(Mat A) 945 { 946 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 947 PetscErrorCode ierr; 948 949 PetscFunctionBegin; 950 ierr = MatRealPart(a->A);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 955 { 956 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 965 { 966 PetscErrorCode ierr; 967 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 968 969 PetscFunctionBegin; 970 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 971 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 972 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 977 978 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 979 { 980 PetscErrorCode ierr; 981 PetscInt i,n; 982 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 983 PetscReal *work; 984 985 PetscFunctionBegin; 986 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 987 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 988 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 989 if (type == NORM_2) { 990 for (i=0; i<n; i++) work[i] *= work[i]; 991 } 992 if (type == NORM_INFINITY) { 993 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 994 } else { 995 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 996 } 997 ierr = PetscFree(work);CHKERRQ(ierr); 998 if (type == NORM_2) { 999 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 #if defined(PETSC_HAVE_CUDA) 1005 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1006 { 1007 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1008 PetscErrorCode ierr; 1009 PetscInt lda; 1010 1011 PetscFunctionBegin; 1012 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1013 if (!a->cvec) { 1014 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1015 } 1016 a->vecinuse = col + 1; 1017 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1018 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1019 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1020 *v = a->cvec; 1021 PetscFunctionReturn(0); 1022 } 1023 1024 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1025 { 1026 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1027 PetscErrorCode ierr; 1028 1029 PetscFunctionBegin; 1030 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1031 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1032 a->vecinuse = 0; 1033 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1034 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1035 *v = NULL; 1036 PetscFunctionReturn(0); 1037 } 1038 1039 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1040 { 1041 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1042 PetscInt lda; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1047 if (!a->cvec) { 1048 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1049 } 1050 a->vecinuse = col + 1; 1051 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1052 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1053 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1054 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1055 *v = a->cvec; 1056 PetscFunctionReturn(0); 1057 } 1058 1059 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1060 { 1061 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1066 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1067 a->vecinuse = 0; 1068 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1069 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1070 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1071 *v = NULL; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1076 { 1077 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt lda; 1080 1081 PetscFunctionBegin; 1082 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1083 if (!a->cvec) { 1084 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1085 } 1086 a->vecinuse = col + 1; 1087 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1088 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1089 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1090 *v = a->cvec; 1091 PetscFunctionReturn(0); 1092 } 1093 1094 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1095 { 1096 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1101 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1102 a->vecinuse = 0; 1103 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1104 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1105 *v = NULL; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1110 { 1111 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1116 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1121 { 1122 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1127 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1132 { 1133 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1138 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1143 { 1144 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1145 PetscErrorCode ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1153 { 1154 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1155 PetscErrorCode ierr; 1156 1157 PetscFunctionBegin; 1158 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1163 { 1164 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1173 { 1174 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1183 { 1184 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1185 PetscErrorCode ierr; 1186 1187 PetscFunctionBegin; 1188 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1193 { 1194 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1203 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1204 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1205 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1206 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1207 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1208 1209 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1210 { 1211 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1212 PetscErrorCode ierr; 1213 1214 PetscFunctionBegin; 1215 if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1216 if (d->A) { 1217 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1218 } 1219 mat->boundtocpu = bind; 1220 if (!bind) { 1221 PetscBool iscuda; 1222 1223 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1224 if (!iscuda) { 1225 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1226 } 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1245 { 1246 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1247 PetscErrorCode ierr; 1248 PetscBool iscuda; 1249 1250 PetscFunctionBegin; 1251 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1252 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1253 if (!iscuda) PetscFunctionReturn(0); 1254 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1255 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1256 if (!d->A) { 1257 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1258 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1259 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1260 } 1261 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1262 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1263 A->preallocated = PETSC_TRUE; 1264 PetscFunctionReturn(0); 1265 } 1266 #endif 1267 1268 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1269 { 1270 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1279 { 1280 PetscFunctionBegin; 1281 *missing = PETSC_FALSE; 1282 PetscFunctionReturn(0); 1283 } 1284 1285 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1286 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1287 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1288 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1289 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1290 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1291 1292 /* -------------------------------------------------------------------*/ 1293 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1294 MatGetRow_MPIDense, 1295 MatRestoreRow_MPIDense, 1296 MatMult_MPIDense, 1297 /* 4*/ MatMultAdd_MPIDense, 1298 MatMultTranspose_MPIDense, 1299 MatMultTransposeAdd_MPIDense, 1300 0, 1301 0, 1302 0, 1303 /* 10*/ 0, 1304 0, 1305 0, 1306 0, 1307 MatTranspose_MPIDense, 1308 /* 15*/ MatGetInfo_MPIDense, 1309 MatEqual_MPIDense, 1310 MatGetDiagonal_MPIDense, 1311 MatDiagonalScale_MPIDense, 1312 MatNorm_MPIDense, 1313 /* 20*/ MatAssemblyBegin_MPIDense, 1314 MatAssemblyEnd_MPIDense, 1315 MatSetOption_MPIDense, 1316 MatZeroEntries_MPIDense, 1317 /* 24*/ MatZeroRows_MPIDense, 1318 0, 1319 0, 1320 0, 1321 0, 1322 /* 29*/ MatSetUp_MPIDense, 1323 0, 1324 0, 1325 MatGetDiagonalBlock_MPIDense, 1326 0, 1327 /* 34*/ MatDuplicate_MPIDense, 1328 0, 1329 0, 1330 0, 1331 0, 1332 /* 39*/ MatAXPY_MPIDense, 1333 MatCreateSubMatrices_MPIDense, 1334 0, 1335 MatGetValues_MPIDense, 1336 0, 1337 /* 44*/ 0, 1338 MatScale_MPIDense, 1339 MatShift_Basic, 1340 0, 1341 0, 1342 /* 49*/ MatSetRandom_MPIDense, 1343 0, 1344 0, 1345 0, 1346 0, 1347 /* 54*/ 0, 1348 0, 1349 0, 1350 0, 1351 0, 1352 /* 59*/ MatCreateSubMatrix_MPIDense, 1353 MatDestroy_MPIDense, 1354 MatView_MPIDense, 1355 0, 1356 0, 1357 /* 64*/ 0, 1358 0, 1359 0, 1360 0, 1361 0, 1362 /* 69*/ 0, 1363 0, 1364 0, 1365 0, 1366 0, 1367 /* 74*/ 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /* 79*/ 0, 1373 0, 1374 0, 1375 0, 1376 /* 83*/ MatLoad_MPIDense, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /* 89*/ 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 /* 94*/ 0, 1388 0, 1389 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1390 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1391 0, 1392 /* 99*/ MatProductSetFromOptions_MPIDense, 1393 0, 1394 0, 1395 MatConjugate_MPIDense, 1396 0, 1397 /*104*/ 0, 1398 MatRealPart_MPIDense, 1399 MatImaginaryPart_MPIDense, 1400 0, 1401 0, 1402 /*109*/ 0, 1403 0, 1404 0, 1405 MatGetColumnVector_MPIDense, 1406 MatMissingDiagonal_MPIDense, 1407 /*114*/ 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*119*/ 0, 1413 0, 1414 0, 1415 0, 1416 0, 1417 /*124*/ 0, 1418 MatGetColumnNorms_MPIDense, 1419 0, 1420 0, 1421 0, 1422 /*129*/ 0, 1423 0, 1424 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1425 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1426 0, 1427 /*134*/ 0, 1428 0, 1429 0, 1430 0, 1431 0, 1432 /*139*/ 0, 1433 0, 1434 0, 1435 0, 1436 0, 1437 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1438 /*145*/ 0, 1439 0, 1440 0 1441 }; 1442 1443 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1444 { 1445 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1446 PetscBool iscuda; 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1451 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1452 if (!a->A) { 1453 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1454 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1455 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1456 } 1457 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1458 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1459 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1460 mat->preallocated = PETSC_TRUE; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #if defined(PETSC_HAVE_ELEMENTAL) 1465 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1466 { 1467 Mat mat_elemental; 1468 PetscErrorCode ierr; 1469 PetscScalar *v; 1470 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1471 1472 PetscFunctionBegin; 1473 if (reuse == MAT_REUSE_MATRIX) { 1474 mat_elemental = *newmat; 1475 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1476 } else { 1477 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1478 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1479 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1480 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1481 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1482 } 1483 1484 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1485 for (i=0; i<N; i++) cols[i] = i; 1486 for (i=0; i<m; i++) rows[i] = rstart + i; 1487 1488 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1489 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1490 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1491 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1494 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1495 1496 if (reuse == MAT_INPLACE_MATRIX) { 1497 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1498 } else { 1499 *newmat = mat_elemental; 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 #endif 1504 1505 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1506 { 1507 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1508 PetscErrorCode ierr; 1509 1510 PetscFunctionBegin; 1511 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1516 { 1517 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1526 { 1527 PetscErrorCode ierr; 1528 Mat_MPIDense *mat; 1529 PetscInt m,nloc,N; 1530 1531 PetscFunctionBegin; 1532 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1533 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1534 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1535 PetscInt sum; 1536 1537 if (n == PETSC_DECIDE) { 1538 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1539 } 1540 /* Check sum(n) = N */ 1541 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1542 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1543 1544 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1545 } 1546 1547 /* numeric phase */ 1548 mat = (Mat_MPIDense*)(*outmat)->data; 1549 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1550 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1551 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #if defined(PETSC_HAVE_CUDA) 1556 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1557 { 1558 Mat B; 1559 Mat_MPIDense *m; 1560 PetscErrorCode ierr; 1561 1562 PetscFunctionBegin; 1563 if (reuse == MAT_INITIAL_MATRIX) { 1564 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1565 } else if (reuse == MAT_REUSE_MATRIX) { 1566 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1567 } 1568 1569 B = *newmat; 1570 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1571 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1572 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1573 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1583 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1584 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1585 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1586 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1587 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1588 m = (Mat_MPIDense*)(B)->data; 1589 if (m->A) { 1590 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1591 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1592 } 1593 B->ops->bindtocpu = NULL; 1594 B->offloadmask = PETSC_OFFLOAD_CPU; 1595 PetscFunctionReturn(0); 1596 } 1597 1598 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1599 { 1600 Mat B; 1601 Mat_MPIDense *m; 1602 PetscErrorCode ierr; 1603 1604 PetscFunctionBegin; 1605 if (reuse == MAT_INITIAL_MATRIX) { 1606 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1607 } else if (reuse == MAT_REUSE_MATRIX) { 1608 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1609 } 1610 1611 B = *newmat; 1612 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1613 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1614 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1629 m = (Mat_MPIDense*)(B)->data; 1630 if (m->A) { 1631 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1632 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1633 B->offloadmask = PETSC_OFFLOAD_BOTH; 1634 } else { 1635 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1636 } 1637 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1638 1639 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1640 PetscFunctionReturn(0); 1641 } 1642 #endif 1643 1644 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1645 { 1646 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1647 PetscErrorCode ierr; 1648 PetscInt lda; 1649 1650 PetscFunctionBegin; 1651 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1652 if (!a->cvec) { 1653 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1654 } 1655 a->vecinuse = col + 1; 1656 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1657 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1658 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1659 *v = a->cvec; 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1664 { 1665 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1670 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1671 a->vecinuse = 0; 1672 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1673 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1674 *v = NULL; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1679 { 1680 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1681 PetscErrorCode ierr; 1682 PetscInt lda; 1683 1684 PetscFunctionBegin; 1685 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1686 if (!a->cvec) { 1687 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1688 } 1689 a->vecinuse = col + 1; 1690 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1691 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1692 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1693 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1694 *v = a->cvec; 1695 PetscFunctionReturn(0); 1696 } 1697 1698 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1699 { 1700 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1705 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1706 a->vecinuse = 0; 1707 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1708 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1709 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1710 *v = NULL; 1711 PetscFunctionReturn(0); 1712 } 1713 1714 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1715 { 1716 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1717 PetscErrorCode ierr; 1718 PetscInt lda; 1719 1720 PetscFunctionBegin; 1721 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1722 if (!a->cvec) { 1723 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1724 } 1725 a->vecinuse = col + 1; 1726 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1727 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1728 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1729 *v = a->cvec; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1734 { 1735 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1736 PetscErrorCode ierr; 1737 1738 PetscFunctionBegin; 1739 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1740 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1741 a->vecinuse = 0; 1742 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1743 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1744 *v = NULL; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1749 { 1750 Mat_MPIDense *a; 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1755 mat->data = (void*)a; 1756 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1757 1758 mat->insertmode = NOT_SET_VALUES; 1759 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1760 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1761 1762 /* build cache for off array entries formed */ 1763 a->donotstash = PETSC_FALSE; 1764 1765 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1766 1767 /* stuff used for matrix vector multiply */ 1768 a->lvec = 0; 1769 a->Mvctx = 0; 1770 a->roworiented = PETSC_TRUE; 1771 1772 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1773 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1774 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1776 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1785 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1786 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1789 #if defined(PETSC_HAVE_ELEMENTAL) 1790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1791 #endif 1792 #if defined(PETSC_HAVE_CUDA) 1793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1794 #endif 1795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1796 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1797 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1798 #if defined(PETSC_HAVE_CUDA) 1799 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1800 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1801 #endif 1802 1803 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1804 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1805 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 /*MC 1810 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1811 1812 Options Database Keys: 1813 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1814 1815 Level: beginner 1816 1817 .seealso: 1818 1819 M*/ 1820 #if defined(PETSC_HAVE_CUDA) 1821 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1822 { 1823 PetscErrorCode ierr; 1824 1825 PetscFunctionBegin; 1826 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1827 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 #endif 1831 1832 /*MC 1833 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1834 1835 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1836 and MATMPIDENSE otherwise. 1837 1838 Options Database Keys: 1839 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1840 1841 Level: beginner 1842 1843 1844 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1845 M*/ 1846 1847 /*MC 1848 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1849 1850 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1851 and MATMPIDENSECUDA otherwise. 1852 1853 Options Database Keys: 1854 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1855 1856 Level: beginner 1857 1858 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1859 M*/ 1860 1861 /*@C 1862 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1863 1864 Not collective 1865 1866 Input Parameters: 1867 . B - the matrix 1868 - data - optional location of matrix data. Set data=NULL for PETSc 1869 to control all matrix memory allocation. 1870 1871 Notes: 1872 The dense format is fully compatible with standard Fortran 77 1873 storage by columns. 1874 1875 The data input variable is intended primarily for Fortran programmers 1876 who wish to allocate their own matrix memory space. Most users should 1877 set data=NULL. 1878 1879 Level: intermediate 1880 1881 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1882 @*/ 1883 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1884 { 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1889 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 /*@ 1894 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1895 array provided by the user. This is useful to avoid copying an array 1896 into a matrix 1897 1898 Not Collective 1899 1900 Input Parameters: 1901 + mat - the matrix 1902 - array - the array in column major order 1903 1904 Notes: 1905 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1906 freed when the matrix is destroyed. 1907 1908 Level: developer 1909 1910 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1911 1912 @*/ 1913 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 1914 { 1915 PetscErrorCode ierr; 1916 1917 PetscFunctionBegin; 1918 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1919 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1920 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1921 #if defined(PETSC_HAVE_CUDA) 1922 mat->offloadmask = PETSC_OFFLOAD_CPU; 1923 #endif 1924 PetscFunctionReturn(0); 1925 } 1926 1927 /*@ 1928 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1929 1930 Not Collective 1931 1932 Input Parameters: 1933 . mat - the matrix 1934 1935 Notes: 1936 You can only call this after a call to MatDensePlaceArray() 1937 1938 Level: developer 1939 1940 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1941 1942 @*/ 1943 PetscErrorCode MatDenseResetArray(Mat mat) 1944 { 1945 PetscErrorCode ierr; 1946 1947 PetscFunctionBegin; 1948 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1949 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1950 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1951 PetscFunctionReturn(0); 1952 } 1953 1954 /*@ 1955 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1956 array provided by the user. This is useful to avoid copying an array 1957 into a matrix 1958 1959 Not Collective 1960 1961 Input Parameters: 1962 + mat - the matrix 1963 - array - the array in column major order 1964 1965 Notes: 1966 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 1967 freed by the user. It will be freed when the matrix is destroyed. 1968 1969 Level: developer 1970 1971 .seealso: MatDenseGetArray(), VecReplaceArray() 1972 @*/ 1973 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 1974 { 1975 PetscErrorCode ierr; 1976 1977 PetscFunctionBegin; 1978 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1979 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1980 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1981 #if defined(PETSC_HAVE_CUDA) 1982 mat->offloadmask = PETSC_OFFLOAD_CPU; 1983 #endif 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #if defined(PETSC_HAVE_CUDA) 1988 /*@C 1989 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 1990 array provided by the user. This is useful to avoid copying an array 1991 into a matrix 1992 1993 Not Collective 1994 1995 Input Parameters: 1996 + mat - the matrix 1997 - array - the array in column major order 1998 1999 Notes: 2000 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2001 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2002 2003 Level: developer 2004 2005 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2006 @*/ 2007 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2008 { 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2013 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2014 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2015 mat->offloadmask = PETSC_OFFLOAD_GPU; 2016 PetscFunctionReturn(0); 2017 } 2018 2019 /*@C 2020 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2021 2022 Not Collective 2023 2024 Input Parameters: 2025 . mat - the matrix 2026 2027 Notes: 2028 You can only call this after a call to MatDenseCUDAPlaceArray() 2029 2030 Level: developer 2031 2032 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2033 2034 @*/ 2035 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2036 { 2037 PetscErrorCode ierr; 2038 2039 PetscFunctionBegin; 2040 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2041 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2042 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 /*@C 2047 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2048 array provided by the user. This is useful to avoid copying an array 2049 into a matrix 2050 2051 Not Collective 2052 2053 Input Parameters: 2054 + mat - the matrix 2055 - array - the array in column major order 2056 2057 Notes: 2058 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2059 The memory passed in CANNOT be freed by the user. It will be freed 2060 when the matrix is destroyed. The array should respect the matrix leading dimension. 2061 2062 Level: developer 2063 2064 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2065 @*/ 2066 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2067 { 2068 PetscErrorCode ierr; 2069 2070 PetscFunctionBegin; 2071 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2072 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2073 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2074 mat->offloadmask = PETSC_OFFLOAD_GPU; 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /*@C 2079 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2080 2081 Not Collective 2082 2083 Input Parameters: 2084 . A - the matrix 2085 2086 Output Parameters 2087 . array - the GPU array in column major order 2088 2089 Notes: 2090 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2091 2092 Level: developer 2093 2094 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2095 @*/ 2096 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2097 { 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2102 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2103 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /*@C 2108 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2109 2110 Not Collective 2111 2112 Input Parameters: 2113 + A - the matrix 2114 - array - the GPU array in column major order 2115 2116 Notes: 2117 2118 Level: developer 2119 2120 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2121 @*/ 2122 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2123 { 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2128 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2129 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2130 A->offloadmask = PETSC_OFFLOAD_GPU; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /*@C 2135 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2136 2137 Not Collective 2138 2139 Input Parameters: 2140 . A - the matrix 2141 2142 Output Parameters 2143 . array - the GPU array in column major order 2144 2145 Notes: 2146 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2147 2148 Level: developer 2149 2150 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2151 @*/ 2152 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2153 { 2154 PetscErrorCode ierr; 2155 2156 PetscFunctionBegin; 2157 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2158 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 /*@C 2163 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2164 2165 Not Collective 2166 2167 Input Parameters: 2168 + A - the matrix 2169 - array - the GPU array in column major order 2170 2171 Notes: 2172 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2173 2174 Level: developer 2175 2176 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2177 @*/ 2178 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2179 { 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 /*@C 2188 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2189 2190 Not Collective 2191 2192 Input Parameters: 2193 . A - the matrix 2194 2195 Output Parameters 2196 . array - the GPU array in column major order 2197 2198 Notes: 2199 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2200 2201 Level: developer 2202 2203 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2204 @*/ 2205 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2206 { 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2211 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2212 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2213 PetscFunctionReturn(0); 2214 } 2215 2216 /*@C 2217 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2218 2219 Not Collective 2220 2221 Input Parameters: 2222 + A - the matrix 2223 - array - the GPU array in column major order 2224 2225 Notes: 2226 2227 Level: developer 2228 2229 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2230 @*/ 2231 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2232 { 2233 PetscErrorCode ierr; 2234 2235 PetscFunctionBegin; 2236 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2237 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2238 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2239 A->offloadmask = PETSC_OFFLOAD_GPU; 2240 PetscFunctionReturn(0); 2241 } 2242 #endif 2243 2244 /*@C 2245 MatCreateDense - Creates a matrix in dense format. 2246 2247 Collective 2248 2249 Input Parameters: 2250 + comm - MPI communicator 2251 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2252 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2253 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2254 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2255 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2256 to control all matrix memory allocation. 2257 2258 Output Parameter: 2259 . A - the matrix 2260 2261 Notes: 2262 The dense format is fully compatible with standard Fortran 77 2263 storage by columns. 2264 2265 The data input variable is intended primarily for Fortran programmers 2266 who wish to allocate their own matrix memory space. Most users should 2267 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2268 2269 The user MUST specify either the local or global matrix dimensions 2270 (possibly both). 2271 2272 Level: intermediate 2273 2274 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2275 @*/ 2276 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2277 { 2278 PetscErrorCode ierr; 2279 PetscMPIInt size; 2280 2281 PetscFunctionBegin; 2282 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2283 PetscValidLogicalCollectiveBool(*A,!!data,6); 2284 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2286 if (size > 1) { 2287 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2288 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2289 if (data) { /* user provided data array, so no need to assemble */ 2290 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2291 (*A)->assembled = PETSC_TRUE; 2292 } 2293 } else { 2294 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2295 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2296 } 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #if defined(PETSC_HAVE_CUDA) 2301 /*@C 2302 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2303 2304 Collective 2305 2306 Input Parameters: 2307 + comm - MPI communicator 2308 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2309 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2310 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2311 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2312 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2313 to control matrix memory allocation. 2314 2315 Output Parameter: 2316 . A - the matrix 2317 2318 Notes: 2319 2320 Level: intermediate 2321 2322 .seealso: MatCreate(), MatCreateDense() 2323 @*/ 2324 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2325 { 2326 PetscErrorCode ierr; 2327 PetscMPIInt size; 2328 2329 PetscFunctionBegin; 2330 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2331 PetscValidLogicalCollectiveBool(*A,!!data,6); 2332 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2333 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2334 if (size > 1) { 2335 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2336 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2337 if (data) { /* user provided data array, so no need to assemble */ 2338 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2339 (*A)->assembled = PETSC_TRUE; 2340 } 2341 } else { 2342 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2343 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2344 } 2345 PetscFunctionReturn(0); 2346 } 2347 #endif 2348 2349 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2350 { 2351 Mat mat; 2352 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2353 PetscErrorCode ierr; 2354 2355 PetscFunctionBegin; 2356 *newmat = 0; 2357 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2358 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2359 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2360 a = (Mat_MPIDense*)mat->data; 2361 2362 mat->factortype = A->factortype; 2363 mat->assembled = PETSC_TRUE; 2364 mat->preallocated = PETSC_TRUE; 2365 2366 a->size = oldmat->size; 2367 a->rank = oldmat->rank; 2368 mat->insertmode = NOT_SET_VALUES; 2369 a->donotstash = oldmat->donotstash; 2370 2371 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2372 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2373 2374 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2375 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2376 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2377 2378 *newmat = mat; 2379 PetscFunctionReturn(0); 2380 } 2381 2382 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2383 { 2384 PetscErrorCode ierr; 2385 PetscBool isbinary; 2386 #if defined(PETSC_HAVE_HDF5) 2387 PetscBool ishdf5; 2388 #endif 2389 2390 PetscFunctionBegin; 2391 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2392 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2393 /* force binary viewer to load .info file if it has not yet done so */ 2394 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2395 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2396 #if defined(PETSC_HAVE_HDF5) 2397 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2398 #endif 2399 if (isbinary) { 2400 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2401 #if defined(PETSC_HAVE_HDF5) 2402 } else if (ishdf5) { 2403 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2404 #endif 2405 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2410 { 2411 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2412 Mat a,b; 2413 PetscBool flg; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 a = matA->A; 2418 b = matB->A; 2419 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2420 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2425 { 2426 PetscErrorCode ierr; 2427 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2428 2429 PetscFunctionBegin; 2430 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2431 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2432 ierr = PetscFree(atb);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2437 { 2438 PetscErrorCode ierr; 2439 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2440 2441 PetscFunctionBegin; 2442 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2443 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2444 ierr = PetscFree(abt);CHKERRQ(ierr); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2449 { 2450 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2451 Mat_TransMatMultDense *atb; 2452 PetscErrorCode ierr; 2453 MPI_Comm comm; 2454 PetscMPIInt size,*recvcounts; 2455 PetscScalar *carray,*sendbuf; 2456 const PetscScalar *atbarray; 2457 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2458 const PetscInt *ranges; 2459 2460 PetscFunctionBegin; 2461 MatCheckProduct(C,3); 2462 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2463 atb = (Mat_TransMatMultDense *)C->product->data; 2464 recvcounts = atb->recvcounts; 2465 sendbuf = atb->sendbuf; 2466 2467 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2468 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2469 2470 /* compute atbarray = aseq^T * bseq */ 2471 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2472 2473 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2474 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2475 2476 /* arrange atbarray into sendbuf */ 2477 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2478 for (proc=0, k=0; proc<size; proc++) { 2479 for (j=0; j<cN; j++) { 2480 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2481 } 2482 } 2483 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2484 2485 /* sum all atbarray to local values of C */ 2486 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 2487 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2488 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2493 { 2494 PetscErrorCode ierr; 2495 MPI_Comm comm; 2496 PetscMPIInt size; 2497 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2498 Mat_TransMatMultDense *atb; 2499 PetscBool cisdense; 2500 2501 PetscFunctionBegin; 2502 MatCheckProduct(C,3); 2503 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2504 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2505 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2506 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2507 } 2508 2509 /* create matrix product C */ 2510 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2511 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2512 if (!cisdense) { 2513 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2514 } 2515 ierr = MatSetUp(C);CHKERRQ(ierr); 2516 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2517 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2518 2519 /* create data structure for reuse C */ 2520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2521 ierr = PetscNew(&atb);CHKERRQ(ierr); 2522 cM = C->rmap->N; 2523 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2524 2525 C->product->data = atb; 2526 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2527 PetscFunctionReturn(0); 2528 } 2529 2530 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2531 { 2532 PetscErrorCode ierr; 2533 MPI_Comm comm; 2534 PetscMPIInt i, size; 2535 PetscInt maxRows, bufsiz; 2536 PetscMPIInt tag; 2537 PetscInt alg; 2538 Mat_MatTransMultDense *abt; 2539 Mat_Product *product = C->product; 2540 PetscBool flg; 2541 2542 PetscFunctionBegin; 2543 MatCheckProduct(C,4); 2544 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2545 /* check local size of A and B */ 2546 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2547 2548 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2549 alg = flg ? 0 : 1; 2550 2551 /* setup matrix product C */ 2552 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2553 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2554 ierr = MatSetUp(C);CHKERRQ(ierr); 2555 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2556 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2557 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2558 2559 /* create data structure for reuse C */ 2560 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2561 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2562 ierr = PetscNew(&abt);CHKERRQ(ierr); 2563 abt->tag = tag; 2564 abt->alg = alg; 2565 switch (alg) { 2566 case 1: /* alg: "cyclic" */ 2567 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2568 bufsiz = A->cmap->N * maxRows; 2569 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2570 break; 2571 default: /* alg: "allgatherv" */ 2572 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2573 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2574 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2575 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2576 break; 2577 } 2578 2579 C->product->data = abt; 2580 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2581 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2582 PetscFunctionReturn(0); 2583 } 2584 2585 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2586 { 2587 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2588 Mat_MatTransMultDense *abt; 2589 PetscErrorCode ierr; 2590 MPI_Comm comm; 2591 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2592 PetscScalar *sendbuf, *recvbuf=0, *cv; 2593 PetscInt i,cK=A->cmap->N,k,j,bn; 2594 PetscScalar _DOne=1.0,_DZero=0.0; 2595 const PetscScalar *av,*bv; 2596 PetscBLASInt cm, cn, ck, alda, blda, clda; 2597 MPI_Request reqs[2]; 2598 const PetscInt *ranges; 2599 2600 PetscFunctionBegin; 2601 MatCheckProduct(C,3); 2602 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2603 abt = (Mat_MatTransMultDense*)C->product->data; 2604 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2605 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2607 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2608 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2609 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2610 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2611 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2612 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2613 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2614 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2615 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2616 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2617 bn = B->rmap->n; 2618 if (blda == bn) { 2619 sendbuf = (PetscScalar*)bv; 2620 } else { 2621 sendbuf = abt->buf[0]; 2622 for (k = 0, i = 0; i < cK; i++) { 2623 for (j = 0; j < bn; j++, k++) { 2624 sendbuf[k] = bv[i * blda + j]; 2625 } 2626 } 2627 } 2628 if (size > 1) { 2629 sendto = (rank + size - 1) % size; 2630 recvfrom = (rank + size + 1) % size; 2631 } else { 2632 sendto = recvfrom = 0; 2633 } 2634 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2635 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2636 recvisfrom = rank; 2637 for (i = 0; i < size; i++) { 2638 /* we have finished receiving in sending, bufs can be read/modified */ 2639 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2640 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2641 2642 if (nextrecvisfrom != rank) { 2643 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2644 sendsiz = cK * bn; 2645 recvsiz = cK * nextbn; 2646 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2647 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2648 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2649 } 2650 2651 /* local aseq * sendbuf^T */ 2652 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2653 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2654 2655 if (nextrecvisfrom != rank) { 2656 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2657 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2658 } 2659 bn = nextbn; 2660 recvisfrom = nextrecvisfrom; 2661 sendbuf = recvbuf; 2662 } 2663 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2664 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2665 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2666 PetscFunctionReturn(0); 2667 } 2668 2669 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2670 { 2671 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2672 Mat_MatTransMultDense *abt; 2673 PetscErrorCode ierr; 2674 MPI_Comm comm; 2675 PetscMPIInt size; 2676 PetscScalar *cv, *sendbuf, *recvbuf; 2677 const PetscScalar *av,*bv; 2678 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2679 PetscScalar _DOne=1.0,_DZero=0.0; 2680 PetscBLASInt cm, cn, ck, alda, clda; 2681 2682 PetscFunctionBegin; 2683 MatCheckProduct(C,3); 2684 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2685 abt = (Mat_MatTransMultDense*)C->product->data; 2686 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2687 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2688 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2689 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2690 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2691 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2692 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2693 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2694 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2695 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2696 /* copy transpose of B into buf[0] */ 2697 bn = B->rmap->n; 2698 sendbuf = abt->buf[0]; 2699 recvbuf = abt->buf[1]; 2700 for (k = 0, j = 0; j < bn; j++) { 2701 for (i = 0; i < cK; i++, k++) { 2702 sendbuf[k] = bv[i * blda + j]; 2703 } 2704 } 2705 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2706 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2707 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2708 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2710 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2711 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2712 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2713 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2714 PetscFunctionReturn(0); 2715 } 2716 2717 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2718 { 2719 Mat_MatTransMultDense *abt; 2720 PetscErrorCode ierr; 2721 2722 PetscFunctionBegin; 2723 MatCheckProduct(C,3); 2724 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2725 abt = (Mat_MatTransMultDense*)C->product->data; 2726 switch (abt->alg) { 2727 case 1: 2728 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2729 break; 2730 default: 2731 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2732 break; 2733 } 2734 PetscFunctionReturn(0); 2735 } 2736 2737 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2738 { 2739 PetscErrorCode ierr; 2740 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2741 2742 PetscFunctionBegin; 2743 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2744 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2745 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2746 ierr = PetscFree(ab);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 #if defined(PETSC_HAVE_ELEMENTAL) 2751 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2752 { 2753 PetscErrorCode ierr; 2754 Mat_MatMultDense *ab; 2755 2756 PetscFunctionBegin; 2757 MatCheckProduct(C,3); 2758 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2759 ab = (Mat_MatMultDense*)C->product->data; 2760 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2761 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2762 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2763 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2768 { 2769 PetscErrorCode ierr; 2770 Mat Ae,Be,Ce; 2771 Mat_MatMultDense *ab; 2772 2773 PetscFunctionBegin; 2774 MatCheckProduct(C,4); 2775 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2776 /* check local size of A and B */ 2777 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2778 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2779 } 2780 2781 /* create elemental matrices Ae and Be */ 2782 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2783 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2784 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2785 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2786 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2787 2788 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2789 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2790 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2791 ierr = MatSetUp(Be);CHKERRQ(ierr); 2792 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2793 2794 /* compute symbolic Ce = Ae*Be */ 2795 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2796 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2797 2798 /* setup C */ 2799 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2800 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2801 ierr = MatSetUp(C);CHKERRQ(ierr); 2802 2803 /* create data structure for reuse Cdense */ 2804 ierr = PetscNew(&ab);CHKERRQ(ierr); 2805 ab->Ae = Ae; 2806 ab->Be = Be; 2807 ab->Ce = Ce; 2808 2809 C->product->data = ab; 2810 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2811 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2812 PetscFunctionReturn(0); 2813 } 2814 #endif 2815 /* ----------------------------------------------- */ 2816 #if defined(PETSC_HAVE_ELEMENTAL) 2817 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2818 { 2819 PetscFunctionBegin; 2820 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2821 C->ops->productsymbolic = MatProductSymbolic_AB; 2822 PetscFunctionReturn(0); 2823 } 2824 #endif 2825 2826 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2827 { 2828 Mat_Product *product = C->product; 2829 Mat A = product->A,B=product->B; 2830 2831 PetscFunctionBegin; 2832 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2833 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2834 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2835 C->ops->productsymbolic = MatProductSymbolic_AtB; 2836 PetscFunctionReturn(0); 2837 } 2838 2839 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2840 { 2841 PetscErrorCode ierr; 2842 Mat_Product *product = C->product; 2843 const char *algTypes[2] = {"allgatherv","cyclic"}; 2844 PetscInt alg,nalg = 2; 2845 PetscBool flg = PETSC_FALSE; 2846 2847 PetscFunctionBegin; 2848 /* Set default algorithm */ 2849 alg = 0; /* default is allgatherv */ 2850 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2851 if (flg) { 2852 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2853 } 2854 2855 /* Get runtime option */ 2856 if (product->api_user) { 2857 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2858 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2859 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2860 } else { 2861 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2862 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2863 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2864 } 2865 if (flg) { 2866 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2867 } 2868 2869 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2870 C->ops->productsymbolic = MatProductSymbolic_ABt; 2871 PetscFunctionReturn(0); 2872 } 2873 2874 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2875 { 2876 PetscErrorCode ierr; 2877 Mat_Product *product = C->product; 2878 2879 PetscFunctionBegin; 2880 switch (product->type) { 2881 #if defined(PETSC_HAVE_ELEMENTAL) 2882 case MATPRODUCT_AB: 2883 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2884 break; 2885 #endif 2886 case MATPRODUCT_AtB: 2887 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2888 break; 2889 case MATPRODUCT_ABt: 2890 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2891 break; 2892 default: 2893 break; 2894 } 2895 PetscFunctionReturn(0); 2896 } 2897