1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40 { 41 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44 45 PetscFunctionBegin; 46 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47 lrow = row - rstart; 48 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscErrorCode ierr; 56 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57 58 PetscFunctionBegin; 59 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60 lrow = row - rstart; 61 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 66 { 67 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 70 PetscScalar *array; 71 MPI_Comm comm; 72 PetscBool flg; 73 Mat B; 74 75 PetscFunctionBegin; 76 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 77 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79 if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */ 80 81 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 82 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 100 { 101 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102 PetscErrorCode ierr; 103 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104 PetscBool roworiented = A->roworiented; 105 106 PetscFunctionBegin; 107 for (i=0; i<m; i++) { 108 if (idxm[i] < 0) continue; 109 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 110 if (idxm[i] >= rstart && idxm[i] < rend) { 111 row = idxm[i] - rstart; 112 if (roworiented) { 113 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 114 } else { 115 for (j=0; j<n; j++) { 116 if (idxn[j] < 0) continue; 117 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 118 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 119 } 120 } 121 } else if (!A->donotstash) { 122 mat->assembled = PETSC_FALSE; 123 if (roworiented) { 124 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125 } else { 126 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 127 } 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134 { 135 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136 PetscErrorCode ierr; 137 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 for (j=0; j<n; j++) { 146 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149 } 150 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 156 { 157 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 166 { 167 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168 PetscErrorCode ierr; 169 170 PetscFunctionBegin; 171 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 172 PetscFunctionReturn(0); 173 } 174 175 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 176 { 177 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 195 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 196 { 197 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 198 PetscErrorCode ierr; 199 200 PetscFunctionBegin; 201 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 202 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 207 { 208 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 213 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 214 PetscFunctionReturn(0); 215 } 216 217 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 218 { 219 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 220 PetscErrorCode ierr; 221 222 PetscFunctionBegin; 223 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 224 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 229 { 230 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 231 PetscErrorCode ierr; 232 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 233 const PetscInt *irow,*icol; 234 const PetscScalar *v; 235 PetscScalar *bv; 236 Mat newmat; 237 IS iscol_local; 238 MPI_Comm comm_is,comm_mat; 239 240 PetscFunctionBegin; 241 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 242 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 243 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 244 245 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 246 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 247 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 248 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 249 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 250 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 251 252 /* No parallel redistribution currently supported! Should really check each index set 253 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 254 original matrix! */ 255 256 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 257 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 258 259 /* Check submatrix call */ 260 if (scall == MAT_REUSE_MATRIX) { 261 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 262 /* Really need to test rows and column sizes! */ 263 newmat = *B; 264 } else { 265 /* Create and fill new matrix */ 266 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 267 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 268 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 269 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 270 } 271 272 /* Now extract the data pointers and do the copy, column at a time */ 273 newmatd = (Mat_MPIDense*)newmat->data; 274 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 275 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 276 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 277 for (i=0; i<Ncols; i++) { 278 const PetscScalar *av = v + lda*icol[i]; 279 for (j=0; j<nrows; j++) { 280 *bv++ = av[irow[j] - rstart]; 281 } 282 } 283 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 284 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 285 286 /* Assemble the matrices so that the correct flags are set */ 287 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 289 290 /* Free work space */ 291 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 292 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 293 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 294 *B = newmat; 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 299 { 300 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 301 PetscErrorCode ierr; 302 303 PetscFunctionBegin; 304 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 309 { 310 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 319 { 320 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 321 PetscErrorCode ierr; 322 323 PetscFunctionBegin; 324 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 329 { 330 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 331 PetscErrorCode ierr; 332 PetscInt nstash,reallocs; 333 334 PetscFunctionBegin; 335 if (mdn->donotstash || mat->nooffprocentries) return(0); 336 337 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 338 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 339 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 344 { 345 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 346 PetscErrorCode ierr; 347 PetscInt i,*row,*col,flg,j,rstart,ncols; 348 PetscMPIInt n; 349 PetscScalar *val; 350 351 PetscFunctionBegin; 352 if (!mdn->donotstash && !mat->nooffprocentries) { 353 /* wait on receives */ 354 while (1) { 355 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 356 if (!flg) break; 357 358 for (i=0; i<n;) { 359 /* Now identify the consecutive vals belonging to the same row */ 360 for (j=i,rstart=row[j]; j<n; j++) { 361 if (row[j] != rstart) break; 362 } 363 if (j < n) ncols = j-i; 364 else ncols = n-i; 365 /* Now assemble all these values with a single function call */ 366 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 367 i = j; 368 } 369 } 370 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 371 } 372 373 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 374 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 375 376 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 377 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 378 } 379 PetscFunctionReturn(0); 380 } 381 382 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 383 { 384 PetscErrorCode ierr; 385 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 386 387 PetscFunctionBegin; 388 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 389 PetscFunctionReturn(0); 390 } 391 392 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 393 { 394 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 395 PetscErrorCode ierr; 396 PetscInt i,len,*lrows; 397 398 PetscFunctionBegin; 399 /* get locally owned rows */ 400 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 401 /* fix right hand side if needed */ 402 if (x && b) { 403 const PetscScalar *xx; 404 PetscScalar *bb; 405 406 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 407 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 408 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 409 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 410 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 411 } 412 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 413 if (diag != 0.0) { 414 Vec d; 415 416 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 417 ierr = VecSet(d,diag);CHKERRQ(ierr); 418 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecDestroy(&d);CHKERRQ(ierr); 420 } 421 ierr = PetscFree(lrows);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 426 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 427 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 428 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 429 430 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 431 { 432 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 433 PetscErrorCode ierr; 434 const PetscScalar *ax; 435 PetscScalar *ay; 436 437 PetscFunctionBegin; 438 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 439 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 440 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 441 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 442 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 443 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 444 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 449 { 450 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 451 PetscErrorCode ierr; 452 const PetscScalar *ax; 453 PetscScalar *ay; 454 455 PetscFunctionBegin; 456 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 457 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 458 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 459 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 460 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 461 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 462 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 467 { 468 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 469 PetscErrorCode ierr; 470 const PetscScalar *ax; 471 PetscScalar *ay; 472 473 PetscFunctionBegin; 474 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 475 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 476 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 477 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 478 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 479 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 480 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 481 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 488 PetscErrorCode ierr; 489 const PetscScalar *ax; 490 PetscScalar *ay; 491 492 PetscFunctionBegin; 493 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 494 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 496 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 497 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 498 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 499 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 500 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 PetscErrorCode ierr; 508 PetscInt lda,len,i,n,m = A->rmap->n,radd; 509 PetscScalar *x,zero = 0.0; 510 const PetscScalar *av; 511 512 PetscFunctionBegin; 513 ierr = VecSet(v,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 518 radd = A->rmap->rstart*m; 519 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 520 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 521 for (i=0; i<len; i++) { 522 x[i] = av[radd + i*lda + i]; 523 } 524 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 525 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 PetscErrorCode MatDestroy_MPIDense(Mat mat) 530 { 531 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 532 PetscErrorCode ierr; 533 534 PetscFunctionBegin; 535 #if defined(PETSC_USE_LOG) 536 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 537 #endif 538 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 539 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 540 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 541 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 542 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 543 544 ierr = PetscFree(mat->data);CHKERRQ(ierr); 545 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 546 547 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 548 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 549 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 550 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 551 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 552 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 553 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 554 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 556 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 557 #if defined(PETSC_HAVE_ELEMENTAL) 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 559 #endif 560 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 581 582 #include <petscdraw.h> 583 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 584 { 585 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 586 PetscErrorCode ierr; 587 PetscMPIInt rank = mdn->rank; 588 PetscViewerType vtype; 589 PetscBool iascii,isdraw; 590 PetscViewer sviewer; 591 PetscViewerFormat format; 592 593 PetscFunctionBegin; 594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 596 if (iascii) { 597 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 598 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 599 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 600 MatInfo info; 601 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 602 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 603 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 604 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 605 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 606 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } else if (format == PETSC_VIEWER_ASCII_INFO) { 609 PetscFunctionReturn(0); 610 } 611 } else if (isdraw) { 612 PetscDraw draw; 613 PetscBool isnull; 614 615 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 616 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 617 if (isnull) PetscFunctionReturn(0); 618 } 619 620 { 621 /* assemble the entire matrix onto first processor. */ 622 Mat A; 623 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 624 PetscInt *cols; 625 PetscScalar *vals; 626 627 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 628 if (!rank) { 629 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 630 } else { 631 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 632 } 633 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 634 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 635 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 636 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 637 638 /* Copy the matrix ... This isn't the most efficient means, 639 but it's quick for now */ 640 A->insertmode = INSERT_VALUES; 641 642 row = mat->rmap->rstart; 643 m = mdn->A->rmap->n; 644 for (i=0; i<m; i++) { 645 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 646 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 647 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 648 row++; 649 } 650 651 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 652 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 653 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 654 if (!rank) { 655 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 656 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 657 } 658 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 659 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 660 ierr = MatDestroy(&A);CHKERRQ(ierr); 661 } 662 PetscFunctionReturn(0); 663 } 664 665 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 666 { 667 PetscErrorCode ierr; 668 PetscBool iascii,isbinary,isdraw,issocket; 669 670 PetscFunctionBegin; 671 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 672 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 673 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 674 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 675 676 if (iascii || issocket || isdraw) { 677 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 678 } else if (isbinary) { 679 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 680 } 681 PetscFunctionReturn(0); 682 } 683 684 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 685 { 686 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 687 Mat mdn = mat->A; 688 PetscErrorCode ierr; 689 PetscLogDouble isend[5],irecv[5]; 690 691 PetscFunctionBegin; 692 info->block_size = 1.0; 693 694 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 695 696 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 697 isend[3] = info->memory; isend[4] = info->mallocs; 698 if (flag == MAT_LOCAL) { 699 info->nz_used = isend[0]; 700 info->nz_allocated = isend[1]; 701 info->nz_unneeded = isend[2]; 702 info->memory = isend[3]; 703 info->mallocs = isend[4]; 704 } else if (flag == MAT_GLOBAL_MAX) { 705 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 706 707 info->nz_used = irecv[0]; 708 info->nz_allocated = irecv[1]; 709 info->nz_unneeded = irecv[2]; 710 info->memory = irecv[3]; 711 info->mallocs = irecv[4]; 712 } else if (flag == MAT_GLOBAL_SUM) { 713 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 714 715 info->nz_used = irecv[0]; 716 info->nz_allocated = irecv[1]; 717 info->nz_unneeded = irecv[2]; 718 info->memory = irecv[3]; 719 info->mallocs = irecv[4]; 720 } 721 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 722 info->fill_ratio_needed = 0; 723 info->factor_mallocs = 0; 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 728 { 729 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 switch (op) { 734 case MAT_NEW_NONZERO_LOCATIONS: 735 case MAT_NEW_NONZERO_LOCATION_ERR: 736 case MAT_NEW_NONZERO_ALLOCATION_ERR: 737 MatCheckPreallocated(A,1); 738 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 739 break; 740 case MAT_ROW_ORIENTED: 741 MatCheckPreallocated(A,1); 742 a->roworiented = flg; 743 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 744 break; 745 case MAT_NEW_DIAGONALS: 746 case MAT_KEEP_NONZERO_PATTERN: 747 case MAT_USE_HASH_TABLE: 748 case MAT_SORTED_FULL: 749 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 750 break; 751 case MAT_IGNORE_OFF_PROC_ENTRIES: 752 a->donotstash = flg; 753 break; 754 case MAT_SYMMETRIC: 755 case MAT_STRUCTURALLY_SYMMETRIC: 756 case MAT_HERMITIAN: 757 case MAT_SYMMETRY_ETERNAL: 758 case MAT_IGNORE_LOWER_TRIANGULAR: 759 case MAT_IGNORE_ZERO_ENTRIES: 760 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 761 break; 762 default: 763 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 769 { 770 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 771 const PetscScalar *l; 772 PetscScalar x,*v,*vv,*r; 773 PetscErrorCode ierr; 774 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 775 776 PetscFunctionBegin; 777 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 778 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 779 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 780 if (ll) { 781 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 782 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 783 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 784 for (i=0; i<m; i++) { 785 x = l[i]; 786 v = vv + i; 787 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 788 } 789 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 790 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 791 } 792 if (rr) { 793 const PetscScalar *ar; 794 795 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 796 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 797 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 798 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 799 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 800 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 801 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 802 for (i=0; i<n; i++) { 803 x = r[i]; 804 v = vv + i*lda; 805 for (j=0; j<m; j++) (*v++) *= x; 806 } 807 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 808 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 809 } 810 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 811 PetscFunctionReturn(0); 812 } 813 814 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 815 { 816 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 817 PetscErrorCode ierr; 818 PetscInt i,j; 819 PetscReal sum = 0.0; 820 const PetscScalar *av,*v; 821 822 PetscFunctionBegin; 823 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 824 v = av; 825 if (mdn->size == 1) { 826 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 827 } else { 828 if (type == NORM_FROBENIUS) { 829 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 830 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 831 } 832 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 833 *nrm = PetscSqrtReal(*nrm); 834 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 835 } else if (type == NORM_1) { 836 PetscReal *tmp,*tmp2; 837 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 838 *nrm = 0.0; 839 v = av; 840 for (j=0; j<mdn->A->cmap->n; j++) { 841 for (i=0; i<mdn->A->rmap->n; i++) { 842 tmp[j] += PetscAbsScalar(*v); v++; 843 } 844 } 845 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 846 for (j=0; j<A->cmap->N; j++) { 847 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 848 } 849 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 850 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 851 } else if (type == NORM_INFINITY) { /* max row norm */ 852 PetscReal ntemp; 853 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 854 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 855 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 856 } 857 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 862 { 863 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 864 Mat B; 865 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 866 PetscErrorCode ierr; 867 PetscInt j,i,lda; 868 PetscScalar *v; 869 870 PetscFunctionBegin; 871 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 872 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 873 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 874 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 875 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 876 } else B = *matout; 877 878 m = a->A->rmap->n; n = a->A->cmap->n; 879 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 880 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 881 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 882 for (i=0; i<m; i++) rwork[i] = rstart + i; 883 for (j=0; j<n; j++) { 884 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 885 v += lda; 886 } 887 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 888 ierr = PetscFree(rwork);CHKERRQ(ierr); 889 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 891 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 892 *matout = B; 893 } else { 894 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 895 } 896 PetscFunctionReturn(0); 897 } 898 899 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 900 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 901 902 PetscErrorCode MatSetUp_MPIDense(Mat A) 903 { 904 PetscErrorCode ierr; 905 906 PetscFunctionBegin; 907 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 908 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 909 if (!A->preallocated) { 910 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 916 { 917 PetscErrorCode ierr; 918 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 919 920 PetscFunctionBegin; 921 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 PetscErrorCode MatConjugate_MPIDense(Mat mat) 926 { 927 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 928 PetscErrorCode ierr; 929 930 PetscFunctionBegin; 931 ierr = MatConjugate(a->A);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 935 PetscErrorCode MatRealPart_MPIDense(Mat A) 936 { 937 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 938 PetscErrorCode ierr; 939 940 PetscFunctionBegin; 941 ierr = MatRealPart(a->A);CHKERRQ(ierr); 942 PetscFunctionReturn(0); 943 } 944 945 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 946 { 947 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 948 PetscErrorCode ierr; 949 950 PetscFunctionBegin; 951 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954 955 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 956 { 957 PetscErrorCode ierr; 958 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 959 960 PetscFunctionBegin; 961 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 962 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 963 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 967 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 968 969 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 970 { 971 PetscErrorCode ierr; 972 PetscInt i,n; 973 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 974 PetscReal *work; 975 976 PetscFunctionBegin; 977 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 978 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 979 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 980 if (type == NORM_2) { 981 for (i=0; i<n; i++) work[i] *= work[i]; 982 } 983 if (type == NORM_INFINITY) { 984 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 985 } else { 986 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 987 } 988 ierr = PetscFree(work);CHKERRQ(ierr); 989 if (type == NORM_2) { 990 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 991 } 992 PetscFunctionReturn(0); 993 } 994 995 #if defined(PETSC_HAVE_CUDA) 996 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 997 { 998 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 999 PetscErrorCode ierr; 1000 PetscInt lda; 1001 1002 PetscFunctionBegin; 1003 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1004 if (!a->cvec) { 1005 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1006 } 1007 a->vecinuse = col + 1; 1008 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1009 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1010 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1011 *v = a->cvec; 1012 PetscFunctionReturn(0); 1013 } 1014 1015 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1016 { 1017 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1022 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1023 a->vecinuse = 0; 1024 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1025 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1026 *v = NULL; 1027 PetscFunctionReturn(0); 1028 } 1029 1030 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1031 { 1032 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1033 PetscInt lda; 1034 PetscErrorCode ierr; 1035 1036 PetscFunctionBegin; 1037 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1038 if (!a->cvec) { 1039 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1040 } 1041 a->vecinuse = col + 1; 1042 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1043 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1044 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1045 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1046 *v = a->cvec; 1047 PetscFunctionReturn(0); 1048 } 1049 1050 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1051 { 1052 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1053 PetscErrorCode ierr; 1054 1055 PetscFunctionBegin; 1056 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1057 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1058 a->vecinuse = 0; 1059 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1060 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1061 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1062 *v = NULL; 1063 PetscFunctionReturn(0); 1064 } 1065 1066 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1067 { 1068 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1069 PetscErrorCode ierr; 1070 PetscInt lda; 1071 1072 PetscFunctionBegin; 1073 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1074 if (!a->cvec) { 1075 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1076 } 1077 a->vecinuse = col + 1; 1078 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1079 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1080 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1081 *v = a->cvec; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1086 { 1087 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1092 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1093 a->vecinuse = 0; 1094 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1095 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1096 *v = NULL; 1097 PetscFunctionReturn(0); 1098 } 1099 1100 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1101 { 1102 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1103 PetscErrorCode ierr; 1104 1105 PetscFunctionBegin; 1106 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1107 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1112 { 1113 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1118 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1123 { 1124 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1129 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1134 { 1135 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1144 { 1145 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1146 PetscErrorCode ierr; 1147 1148 PetscFunctionBegin; 1149 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1154 { 1155 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1164 { 1165 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1174 { 1175 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1176 PetscErrorCode ierr; 1177 1178 PetscFunctionBegin; 1179 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1184 { 1185 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1186 PetscErrorCode ierr; 1187 1188 PetscFunctionBegin; 1189 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1194 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1195 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1196 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1197 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1198 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1199 1200 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1201 { 1202 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1207 if (d->A) { 1208 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1209 } 1210 mat->boundtocpu = bind; 1211 if (!bind) { 1212 PetscBool iscuda; 1213 1214 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1215 if (!iscuda) { 1216 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1217 } 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1224 } else { 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1236 { 1237 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1238 PetscErrorCode ierr; 1239 PetscBool iscuda; 1240 1241 PetscFunctionBegin; 1242 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1243 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1244 if (!iscuda) PetscFunctionReturn(0); 1245 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1246 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1247 if (!d->A) { 1248 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1249 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1250 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1251 } 1252 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1253 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1254 A->preallocated = PETSC_TRUE; 1255 PetscFunctionReturn(0); 1256 } 1257 #endif 1258 1259 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1260 { 1261 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1262 PetscErrorCode ierr; 1263 1264 PetscFunctionBegin; 1265 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1270 1271 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1272 { 1273 PetscFunctionBegin; 1274 *missing = PETSC_FALSE; 1275 PetscFunctionReturn(0); 1276 } 1277 1278 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1279 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1280 1281 /* -------------------------------------------------------------------*/ 1282 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1283 MatGetRow_MPIDense, 1284 MatRestoreRow_MPIDense, 1285 MatMult_MPIDense, 1286 /* 4*/ MatMultAdd_MPIDense, 1287 MatMultTranspose_MPIDense, 1288 MatMultTransposeAdd_MPIDense, 1289 0, 1290 0, 1291 0, 1292 /* 10*/ 0, 1293 0, 1294 0, 1295 0, 1296 MatTranspose_MPIDense, 1297 /* 15*/ MatGetInfo_MPIDense, 1298 MatEqual_MPIDense, 1299 MatGetDiagonal_MPIDense, 1300 MatDiagonalScale_MPIDense, 1301 MatNorm_MPIDense, 1302 /* 20*/ MatAssemblyBegin_MPIDense, 1303 MatAssemblyEnd_MPIDense, 1304 MatSetOption_MPIDense, 1305 MatZeroEntries_MPIDense, 1306 /* 24*/ MatZeroRows_MPIDense, 1307 0, 1308 0, 1309 0, 1310 0, 1311 /* 29*/ MatSetUp_MPIDense, 1312 0, 1313 0, 1314 MatGetDiagonalBlock_MPIDense, 1315 0, 1316 /* 34*/ MatDuplicate_MPIDense, 1317 0, 1318 0, 1319 0, 1320 0, 1321 /* 39*/ MatAXPY_MPIDense, 1322 MatCreateSubMatrices_MPIDense, 1323 0, 1324 MatGetValues_MPIDense, 1325 0, 1326 /* 44*/ 0, 1327 MatScale_MPIDense, 1328 MatShift_Basic, 1329 0, 1330 0, 1331 /* 49*/ MatSetRandom_MPIDense, 1332 0, 1333 0, 1334 0, 1335 0, 1336 /* 54*/ 0, 1337 0, 1338 0, 1339 0, 1340 0, 1341 /* 59*/ MatCreateSubMatrix_MPIDense, 1342 MatDestroy_MPIDense, 1343 MatView_MPIDense, 1344 0, 1345 0, 1346 /* 64*/ 0, 1347 0, 1348 0, 1349 0, 1350 0, 1351 /* 69*/ 0, 1352 0, 1353 0, 1354 0, 1355 0, 1356 /* 74*/ 0, 1357 0, 1358 0, 1359 0, 1360 0, 1361 /* 79*/ 0, 1362 0, 1363 0, 1364 0, 1365 /* 83*/ MatLoad_MPIDense, 1366 0, 1367 0, 1368 0, 1369 0, 1370 0, 1371 /* 89*/ 0, 1372 0, 1373 MatMatMultNumeric_MPIDense, 1374 0, 1375 0, 1376 /* 94*/ 0, 1377 0, 1378 0, 1379 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1380 0, 1381 /* 99*/ MatProductSetFromOptions_MPIDense, 1382 0, 1383 0, 1384 MatConjugate_MPIDense, 1385 0, 1386 /*104*/ 0, 1387 MatRealPart_MPIDense, 1388 MatImaginaryPart_MPIDense, 1389 0, 1390 0, 1391 /*109*/ 0, 1392 0, 1393 0, 1394 MatGetColumnVector_MPIDense, 1395 MatMissingDiagonal_MPIDense, 1396 /*114*/ 0, 1397 0, 1398 0, 1399 0, 1400 0, 1401 /*119*/ 0, 1402 0, 1403 0, 1404 0, 1405 0, 1406 /*124*/ 0, 1407 MatGetColumnNorms_MPIDense, 1408 0, 1409 0, 1410 0, 1411 /*129*/ 0, 1412 0, 1413 0, 1414 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1415 0, 1416 /*134*/ 0, 1417 0, 1418 0, 1419 0, 1420 0, 1421 /*139*/ 0, 1422 0, 1423 0, 1424 0, 1425 0, 1426 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1427 /*145*/ 0, 1428 0, 1429 0 1430 }; 1431 1432 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1433 { 1434 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1435 PetscBool iscuda; 1436 PetscErrorCode ierr; 1437 1438 PetscFunctionBegin; 1439 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1440 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1441 if (!a->A) { 1442 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1443 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1444 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1445 } 1446 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1447 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1448 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1449 mat->preallocated = PETSC_TRUE; 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #if defined(PETSC_HAVE_ELEMENTAL) 1454 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1455 { 1456 Mat mat_elemental; 1457 PetscErrorCode ierr; 1458 PetscScalar *v; 1459 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1460 1461 PetscFunctionBegin; 1462 if (reuse == MAT_REUSE_MATRIX) { 1463 mat_elemental = *newmat; 1464 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1465 } else { 1466 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1467 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1468 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1469 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1470 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1471 } 1472 1473 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1474 for (i=0; i<N; i++) cols[i] = i; 1475 for (i=0; i<m; i++) rows[i] = rstart + i; 1476 1477 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1478 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1479 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1480 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1481 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1482 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1483 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1484 1485 if (reuse == MAT_INPLACE_MATRIX) { 1486 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1487 } else { 1488 *newmat = mat_elemental; 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 #endif 1493 1494 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1495 { 1496 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1497 PetscErrorCode ierr; 1498 1499 PetscFunctionBegin; 1500 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1505 { 1506 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1515 { 1516 PetscErrorCode ierr; 1517 Mat_MPIDense *mat; 1518 PetscInt m,nloc,N; 1519 1520 PetscFunctionBegin; 1521 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1522 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1523 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1524 PetscInt sum; 1525 1526 if (n == PETSC_DECIDE) { 1527 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1528 } 1529 /* Check sum(n) = N */ 1530 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1531 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1532 1533 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1534 } 1535 1536 /* numeric phase */ 1537 mat = (Mat_MPIDense*)(*outmat)->data; 1538 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1539 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1540 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #if defined(PETSC_HAVE_CUDA) 1545 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1546 { 1547 Mat B; 1548 Mat_MPIDense *m; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 if (reuse == MAT_INITIAL_MATRIX) { 1553 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1554 } else if (reuse == MAT_REUSE_MATRIX) { 1555 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1556 } 1557 1558 B = *newmat; 1559 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1560 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1561 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1562 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1563 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1564 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1565 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1566 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1567 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1568 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1569 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1570 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1571 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1575 m = (Mat_MPIDense*)(B)->data; 1576 if (m->A) { 1577 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1578 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1579 } 1580 B->ops->bindtocpu = NULL; 1581 B->offloadmask = PETSC_OFFLOAD_CPU; 1582 PetscFunctionReturn(0); 1583 } 1584 1585 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1586 { 1587 Mat B; 1588 Mat_MPIDense *m; 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 if (reuse == MAT_INITIAL_MATRIX) { 1593 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1594 } else if (reuse == MAT_REUSE_MATRIX) { 1595 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1596 } 1597 1598 B = *newmat; 1599 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1600 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1601 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1614 m = (Mat_MPIDense*)(B)->data; 1615 if (m->A) { 1616 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1617 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1618 B->offloadmask = PETSC_OFFLOAD_BOTH; 1619 } else { 1620 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1621 } 1622 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1623 1624 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1625 PetscFunctionReturn(0); 1626 } 1627 #endif 1628 1629 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1630 { 1631 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1632 PetscErrorCode ierr; 1633 PetscInt lda; 1634 1635 PetscFunctionBegin; 1636 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1637 if (!a->cvec) { 1638 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1639 } 1640 a->vecinuse = col + 1; 1641 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1642 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1643 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1644 *v = a->cvec; 1645 PetscFunctionReturn(0); 1646 } 1647 1648 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1649 { 1650 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1651 PetscErrorCode ierr; 1652 1653 PetscFunctionBegin; 1654 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1655 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1656 a->vecinuse = 0; 1657 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1658 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1659 *v = NULL; 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1664 { 1665 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1666 PetscErrorCode ierr; 1667 PetscInt lda; 1668 1669 PetscFunctionBegin; 1670 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1671 if (!a->cvec) { 1672 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1673 } 1674 a->vecinuse = col + 1; 1675 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1676 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1677 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1678 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1679 *v = a->cvec; 1680 PetscFunctionReturn(0); 1681 } 1682 1683 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1684 { 1685 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1686 PetscErrorCode ierr; 1687 1688 PetscFunctionBegin; 1689 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1690 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1691 a->vecinuse = 0; 1692 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1693 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1694 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1695 *v = NULL; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1700 { 1701 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1702 PetscErrorCode ierr; 1703 PetscInt lda; 1704 1705 PetscFunctionBegin; 1706 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1707 if (!a->cvec) { 1708 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1709 } 1710 a->vecinuse = col + 1; 1711 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1712 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1713 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1714 *v = a->cvec; 1715 PetscFunctionReturn(0); 1716 } 1717 1718 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1719 { 1720 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1725 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1726 a->vecinuse = 0; 1727 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1728 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1729 *v = NULL; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1734 { 1735 Mat_MPIDense *a; 1736 PetscErrorCode ierr; 1737 1738 PetscFunctionBegin; 1739 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1740 mat->data = (void*)a; 1741 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1742 1743 mat->insertmode = NOT_SET_VALUES; 1744 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1745 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1746 1747 /* build cache for off array entries formed */ 1748 a->donotstash = PETSC_FALSE; 1749 1750 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1751 1752 /* stuff used for matrix vector multiply */ 1753 a->lvec = 0; 1754 a->Mvctx = 0; 1755 a->roworiented = PETSC_TRUE; 1756 1757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1764 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1765 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1766 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1767 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1768 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1769 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1770 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1771 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1773 #if defined(PETSC_HAVE_ELEMENTAL) 1774 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1775 #endif 1776 #if defined(PETSC_HAVE_CUDA) 1777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1778 #endif 1779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1785 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1786 1787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1791 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 /*MC 1796 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1797 1798 Options Database Keys: 1799 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1800 1801 Level: beginner 1802 1803 .seealso: 1804 1805 M*/ 1806 #if defined(PETSC_HAVE_CUDA) 1807 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1808 { 1809 PetscErrorCode ierr; 1810 1811 PetscFunctionBegin; 1812 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1813 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 #endif 1817 1818 /*MC 1819 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1820 1821 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1822 and MATMPIDENSE otherwise. 1823 1824 Options Database Keys: 1825 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1826 1827 Level: beginner 1828 1829 1830 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1831 M*/ 1832 1833 /*MC 1834 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1835 1836 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1837 and MATMPIDENSECUDA otherwise. 1838 1839 Options Database Keys: 1840 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1841 1842 Level: beginner 1843 1844 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1845 M*/ 1846 1847 /*@C 1848 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1849 1850 Not collective 1851 1852 Input Parameters: 1853 . B - the matrix 1854 - data - optional location of matrix data. Set data=NULL for PETSc 1855 to control all matrix memory allocation. 1856 1857 Notes: 1858 The dense format is fully compatible with standard Fortran 77 1859 storage by columns. 1860 1861 The data input variable is intended primarily for Fortran programmers 1862 who wish to allocate their own matrix memory space. Most users should 1863 set data=NULL. 1864 1865 Level: intermediate 1866 1867 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1868 @*/ 1869 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1870 { 1871 PetscErrorCode ierr; 1872 1873 PetscFunctionBegin; 1874 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1875 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 /*@ 1880 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1881 array provided by the user. This is useful to avoid copying an array 1882 into a matrix 1883 1884 Not Collective 1885 1886 Input Parameters: 1887 + mat - the matrix 1888 - array - the array in column major order 1889 1890 Notes: 1891 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1892 freed when the matrix is destroyed. 1893 1894 Level: developer 1895 1896 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1897 1898 @*/ 1899 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 1900 { 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1905 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1906 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1907 #if defined(PETSC_HAVE_CUDA) 1908 mat->offloadmask = PETSC_OFFLOAD_CPU; 1909 #endif 1910 PetscFunctionReturn(0); 1911 } 1912 1913 /*@ 1914 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1915 1916 Not Collective 1917 1918 Input Parameters: 1919 . mat - the matrix 1920 1921 Notes: 1922 You can only call this after a call to MatDensePlaceArray() 1923 1924 Level: developer 1925 1926 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1927 1928 @*/ 1929 PetscErrorCode MatDenseResetArray(Mat mat) 1930 { 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1935 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1936 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 /*@ 1941 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1942 array provided by the user. This is useful to avoid copying an array 1943 into a matrix 1944 1945 Not Collective 1946 1947 Input Parameters: 1948 + mat - the matrix 1949 - array - the array in column major order 1950 1951 Notes: 1952 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 1953 freed by the user. It will be freed when the matrix is destroyed. 1954 1955 Level: developer 1956 1957 .seealso: MatDenseGetArray(), VecReplaceArray() 1958 @*/ 1959 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 1960 { 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1965 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1966 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1967 #if defined(PETSC_HAVE_CUDA) 1968 mat->offloadmask = PETSC_OFFLOAD_CPU; 1969 #endif 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #if defined(PETSC_HAVE_CUDA) 1974 /*@C 1975 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 1976 array provided by the user. This is useful to avoid copying an array 1977 into a matrix 1978 1979 Not Collective 1980 1981 Input Parameters: 1982 + mat - the matrix 1983 - array - the array in column major order 1984 1985 Notes: 1986 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 1987 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 1988 1989 Level: developer 1990 1991 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 1992 @*/ 1993 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 1994 { 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1999 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2000 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2001 mat->offloadmask = PETSC_OFFLOAD_GPU; 2002 PetscFunctionReturn(0); 2003 } 2004 2005 /*@C 2006 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2007 2008 Not Collective 2009 2010 Input Parameters: 2011 . mat - the matrix 2012 2013 Notes: 2014 You can only call this after a call to MatDenseCUDAPlaceArray() 2015 2016 Level: developer 2017 2018 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2019 2020 @*/ 2021 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2022 { 2023 PetscErrorCode ierr; 2024 2025 PetscFunctionBegin; 2026 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2027 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2028 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 /*@C 2033 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2034 array provided by the user. This is useful to avoid copying an array 2035 into a matrix 2036 2037 Not Collective 2038 2039 Input Parameters: 2040 + mat - the matrix 2041 - array - the array in column major order 2042 2043 Notes: 2044 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2045 The memory passed in CANNOT be freed by the user. It will be freed 2046 when the matrix is destroyed. The array should respect the matrix leading dimension. 2047 2048 Level: developer 2049 2050 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2051 @*/ 2052 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2053 { 2054 PetscErrorCode ierr; 2055 2056 PetscFunctionBegin; 2057 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2058 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2059 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2060 mat->offloadmask = PETSC_OFFLOAD_GPU; 2061 PetscFunctionReturn(0); 2062 } 2063 2064 /*@C 2065 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2066 2067 Not Collective 2068 2069 Input Parameters: 2070 . A - the matrix 2071 2072 Output Parameters 2073 . array - the GPU array in column major order 2074 2075 Notes: 2076 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2077 2078 Level: developer 2079 2080 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2081 @*/ 2082 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2083 { 2084 PetscErrorCode ierr; 2085 2086 PetscFunctionBegin; 2087 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2088 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2089 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /*@C 2094 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2095 2096 Not Collective 2097 2098 Input Parameters: 2099 + A - the matrix 2100 - array - the GPU array in column major order 2101 2102 Notes: 2103 2104 Level: developer 2105 2106 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2107 @*/ 2108 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2109 { 2110 PetscErrorCode ierr; 2111 2112 PetscFunctionBegin; 2113 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2114 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2115 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2116 A->offloadmask = PETSC_OFFLOAD_GPU; 2117 PetscFunctionReturn(0); 2118 } 2119 2120 /*@C 2121 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2122 2123 Not Collective 2124 2125 Input Parameters: 2126 . A - the matrix 2127 2128 Output Parameters 2129 . array - the GPU array in column major order 2130 2131 Notes: 2132 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2133 2134 Level: developer 2135 2136 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2137 @*/ 2138 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2139 { 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2144 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /*@C 2149 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2150 2151 Not Collective 2152 2153 Input Parameters: 2154 + A - the matrix 2155 - array - the GPU array in column major order 2156 2157 Notes: 2158 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2159 2160 Level: developer 2161 2162 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2163 @*/ 2164 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2165 { 2166 PetscErrorCode ierr; 2167 2168 PetscFunctionBegin; 2169 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2170 PetscFunctionReturn(0); 2171 } 2172 2173 /*@C 2174 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2175 2176 Not Collective 2177 2178 Input Parameters: 2179 . A - the matrix 2180 2181 Output Parameters 2182 . array - the GPU array in column major order 2183 2184 Notes: 2185 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2186 2187 Level: developer 2188 2189 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2190 @*/ 2191 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2192 { 2193 PetscErrorCode ierr; 2194 2195 PetscFunctionBegin; 2196 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2197 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2198 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 /*@C 2203 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2204 2205 Not Collective 2206 2207 Input Parameters: 2208 + A - the matrix 2209 - array - the GPU array in column major order 2210 2211 Notes: 2212 2213 Level: developer 2214 2215 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2216 @*/ 2217 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2218 { 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2223 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2224 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2225 A->offloadmask = PETSC_OFFLOAD_GPU; 2226 PetscFunctionReturn(0); 2227 } 2228 #endif 2229 2230 /*@C 2231 MatCreateDense - Creates a matrix in dense format. 2232 2233 Collective 2234 2235 Input Parameters: 2236 + comm - MPI communicator 2237 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2238 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2239 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2240 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2241 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2242 to control all matrix memory allocation. 2243 2244 Output Parameter: 2245 . A - the matrix 2246 2247 Notes: 2248 The dense format is fully compatible with standard Fortran 77 2249 storage by columns. 2250 2251 The data input variable is intended primarily for Fortran programmers 2252 who wish to allocate their own matrix memory space. Most users should 2253 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2254 2255 The user MUST specify either the local or global matrix dimensions 2256 (possibly both). 2257 2258 Level: intermediate 2259 2260 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2261 @*/ 2262 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2263 { 2264 PetscErrorCode ierr; 2265 PetscMPIInt size; 2266 2267 PetscFunctionBegin; 2268 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2269 PetscValidLogicalCollectiveBool(*A,!!data,6); 2270 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2271 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2272 if (size > 1) { 2273 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2274 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2275 if (data) { /* user provided data array, so no need to assemble */ 2276 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2277 (*A)->assembled = PETSC_TRUE; 2278 } 2279 } else { 2280 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2281 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2282 } 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #if defined(PETSC_HAVE_CUDA) 2287 /*@C 2288 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2289 2290 Collective 2291 2292 Input Parameters: 2293 + comm - MPI communicator 2294 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2295 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2296 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2297 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2298 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2299 to control matrix memory allocation. 2300 2301 Output Parameter: 2302 . A - the matrix 2303 2304 Notes: 2305 2306 Level: intermediate 2307 2308 .seealso: MatCreate(), MatCreateDense() 2309 @*/ 2310 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2311 { 2312 PetscErrorCode ierr; 2313 PetscMPIInt size; 2314 2315 PetscFunctionBegin; 2316 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2317 PetscValidLogicalCollectiveBool(*A,!!data,6); 2318 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2319 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2320 if (size > 1) { 2321 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2322 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2323 if (data) { /* user provided data array, so no need to assemble */ 2324 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2325 (*A)->assembled = PETSC_TRUE; 2326 } 2327 } else { 2328 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2329 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2330 } 2331 PetscFunctionReturn(0); 2332 } 2333 #endif 2334 2335 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2336 { 2337 Mat mat; 2338 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2339 PetscErrorCode ierr; 2340 2341 PetscFunctionBegin; 2342 *newmat = 0; 2343 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2344 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2345 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2346 a = (Mat_MPIDense*)mat->data; 2347 2348 mat->factortype = A->factortype; 2349 mat->assembled = PETSC_TRUE; 2350 mat->preallocated = PETSC_TRUE; 2351 2352 a->size = oldmat->size; 2353 a->rank = oldmat->rank; 2354 mat->insertmode = NOT_SET_VALUES; 2355 a->donotstash = oldmat->donotstash; 2356 2357 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2358 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2359 2360 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2361 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2362 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2363 2364 *newmat = mat; 2365 PetscFunctionReturn(0); 2366 } 2367 2368 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2369 { 2370 PetscErrorCode ierr; 2371 PetscBool isbinary; 2372 #if defined(PETSC_HAVE_HDF5) 2373 PetscBool ishdf5; 2374 #endif 2375 2376 PetscFunctionBegin; 2377 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2378 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2379 /* force binary viewer to load .info file if it has not yet done so */ 2380 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2382 #if defined(PETSC_HAVE_HDF5) 2383 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2384 #endif 2385 if (isbinary) { 2386 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2387 #if defined(PETSC_HAVE_HDF5) 2388 } else if (ishdf5) { 2389 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2390 #endif 2391 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2396 { 2397 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2398 Mat a,b; 2399 PetscBool flg; 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 a = matA->A; 2404 b = matB->A; 2405 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2406 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 2411 { 2412 PetscErrorCode ierr; 2413 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2414 Mat_TransMatMultDense *atb = a->atbdense; 2415 2416 PetscFunctionBegin; 2417 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2418 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2419 ierr = (*atb->destroy)(A);CHKERRQ(ierr); 2420 ierr = PetscFree(atb);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 2425 { 2426 PetscErrorCode ierr; 2427 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2428 Mat_MatTransMultDense *abt = a->abtdense; 2429 2430 PetscFunctionBegin; 2431 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2432 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2433 ierr = (abt->destroy)(A);CHKERRQ(ierr); 2434 ierr = PetscFree(abt);CHKERRQ(ierr); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2439 { 2440 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2441 Mat_TransMatMultDense *atb = c->atbdense; 2442 PetscErrorCode ierr; 2443 MPI_Comm comm; 2444 PetscMPIInt size,*recvcounts=atb->recvcounts; 2445 PetscScalar *carray,*sendbuf=atb->sendbuf; 2446 const PetscScalar *atbarray; 2447 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2448 const PetscInt *ranges; 2449 2450 PetscFunctionBegin; 2451 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2452 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2453 2454 /* compute atbarray = aseq^T * bseq */ 2455 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2456 2457 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2458 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2459 2460 /* arrange atbarray into sendbuf */ 2461 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2462 for (proc=0, k=0; proc<size; proc++) { 2463 for (j=0; j<cN; j++) { 2464 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2465 } 2466 } 2467 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2468 2469 /* sum all atbarray to local values of C */ 2470 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 2471 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2472 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2477 { 2478 PetscErrorCode ierr; 2479 MPI_Comm comm; 2480 PetscMPIInt size; 2481 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2482 Mat_MPIDense *c; 2483 Mat_TransMatMultDense *atb; 2484 2485 PetscFunctionBegin; 2486 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2487 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2488 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2489 } 2490 2491 /* create matrix product C */ 2492 ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2493 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2494 ierr = MatSetUp(C);CHKERRQ(ierr); 2495 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2496 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 2498 /* create data structure for reuse C */ 2499 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2500 ierr = PetscNew(&atb);CHKERRQ(ierr); 2501 cM = C->rmap->N; 2502 ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2503 2504 c = (Mat_MPIDense*)C->data; 2505 c->atbdense = atb; 2506 atb->destroy = C->ops->destroy; 2507 C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2508 PetscFunctionReturn(0); 2509 } 2510 2511 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2512 { 2513 PetscErrorCode ierr; 2514 MPI_Comm comm; 2515 PetscMPIInt i, size; 2516 PetscInt maxRows, bufsiz; 2517 Mat_MPIDense *c; 2518 PetscMPIInt tag; 2519 PetscInt alg; 2520 Mat_MatTransMultDense *abt; 2521 Mat_Product *product = C->product; 2522 PetscBool flg; 2523 2524 PetscFunctionBegin; 2525 /* check local size of A and B */ 2526 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2527 2528 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2529 alg = flg ? 0 : 1; 2530 2531 /* setup matrix product C */ 2532 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2533 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2534 ierr = MatSetUp(C);CHKERRQ(ierr); 2535 ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 2536 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2537 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2538 2539 /* create data structure for reuse C */ 2540 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2541 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2542 ierr = PetscNew(&abt);CHKERRQ(ierr); 2543 abt->tag = tag; 2544 abt->alg = alg; 2545 switch (alg) { 2546 case 1: /* alg: "cyclic" */ 2547 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2548 bufsiz = A->cmap->N * maxRows; 2549 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2550 break; 2551 default: /* alg: "allgatherv" */ 2552 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2553 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2554 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2555 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2556 break; 2557 } 2558 2559 c = (Mat_MPIDense*)C->data; 2560 c->abtdense = abt; 2561 abt->destroy = C->ops->destroy; 2562 C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2563 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2564 PetscFunctionReturn(0); 2565 } 2566 2567 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2568 { 2569 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2570 Mat_MatTransMultDense *abt = c->abtdense; 2571 PetscErrorCode ierr; 2572 MPI_Comm comm; 2573 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2574 PetscScalar *sendbuf, *recvbuf=0, *cv; 2575 PetscInt i,cK=A->cmap->N,k,j,bn; 2576 PetscScalar _DOne=1.0,_DZero=0.0; 2577 const PetscScalar *av,*bv; 2578 PetscBLASInt cm, cn, ck, alda, blda, clda; 2579 MPI_Request reqs[2]; 2580 const PetscInt *ranges; 2581 2582 PetscFunctionBegin; 2583 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2584 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2586 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2587 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2588 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2589 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2590 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2591 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2592 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2593 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2594 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2595 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2596 bn = B->rmap->n; 2597 if (blda == bn) { 2598 sendbuf = (PetscScalar*)bv; 2599 } else { 2600 sendbuf = abt->buf[0]; 2601 for (k = 0, i = 0; i < cK; i++) { 2602 for (j = 0; j < bn; j++, k++) { 2603 sendbuf[k] = bv[i * blda + j]; 2604 } 2605 } 2606 } 2607 if (size > 1) { 2608 sendto = (rank + size - 1) % size; 2609 recvfrom = (rank + size + 1) % size; 2610 } else { 2611 sendto = recvfrom = 0; 2612 } 2613 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2614 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2615 recvisfrom = rank; 2616 for (i = 0; i < size; i++) { 2617 /* we have finished receiving in sending, bufs can be read/modified */ 2618 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2619 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2620 2621 if (nextrecvisfrom != rank) { 2622 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2623 sendsiz = cK * bn; 2624 recvsiz = cK * nextbn; 2625 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2626 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2627 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2628 } 2629 2630 /* local aseq * sendbuf^T */ 2631 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2632 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2633 2634 if (nextrecvisfrom != rank) { 2635 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2636 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2637 } 2638 bn = nextbn; 2639 recvisfrom = nextrecvisfrom; 2640 sendbuf = recvbuf; 2641 } 2642 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2643 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2644 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2645 PetscFunctionReturn(0); 2646 } 2647 2648 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2649 { 2650 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2651 Mat_MatTransMultDense *abt = c->abtdense; 2652 PetscErrorCode ierr; 2653 MPI_Comm comm; 2654 PetscMPIInt size; 2655 PetscScalar *cv, *sendbuf, *recvbuf; 2656 const PetscScalar *av,*bv; 2657 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2658 PetscScalar _DOne=1.0,_DZero=0.0; 2659 PetscBLASInt cm, cn, ck, alda, clda; 2660 2661 PetscFunctionBegin; 2662 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2663 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2664 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2665 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2666 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2667 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2668 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2669 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2670 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2671 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2672 /* copy transpose of B into buf[0] */ 2673 bn = B->rmap->n; 2674 sendbuf = abt->buf[0]; 2675 recvbuf = abt->buf[1]; 2676 for (k = 0, j = 0; j < bn; j++) { 2677 for (i = 0; i < cK; i++, k++) { 2678 sendbuf[k] = bv[i * blda + j]; 2679 } 2680 } 2681 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2682 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2683 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2684 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2685 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2686 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2687 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2688 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2689 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2694 { 2695 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2696 Mat_MatTransMultDense *abt = c->abtdense; 2697 PetscErrorCode ierr; 2698 2699 PetscFunctionBegin; 2700 switch (abt->alg) { 2701 case 1: 2702 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2703 break; 2704 default: 2705 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2706 break; 2707 } 2708 PetscFunctionReturn(0); 2709 } 2710 2711 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2712 { 2713 PetscErrorCode ierr; 2714 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2715 Mat_MatMultDense *ab = a->abdense; 2716 2717 PetscFunctionBegin; 2718 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2719 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2720 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2721 2722 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2723 ierr = PetscFree(ab);CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 #if defined(PETSC_HAVE_ELEMENTAL) 2728 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2729 { 2730 PetscErrorCode ierr; 2731 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2732 Mat_MatMultDense *ab=c->abdense; 2733 2734 PetscFunctionBegin; 2735 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2736 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2737 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2738 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2743 { 2744 PetscErrorCode ierr; 2745 Mat Ae,Be,Ce; 2746 Mat_MPIDense *c; 2747 Mat_MatMultDense *ab; 2748 2749 PetscFunctionBegin; 2750 /* check local size of A and B */ 2751 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2752 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2753 } 2754 2755 /* create elemental matrices Ae and Be */ 2756 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2757 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2758 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2759 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2760 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2761 2762 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2763 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2764 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2765 ierr = MatSetUp(Be);CHKERRQ(ierr); 2766 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2767 2768 /* compute symbolic Ce = Ae*Be */ 2769 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2770 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2771 2772 /* setup C */ 2773 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2774 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2775 ierr = MatSetUp(C);CHKERRQ(ierr); 2776 2777 /* create data structure for reuse Cdense */ 2778 ierr = PetscNew(&ab);CHKERRQ(ierr); 2779 c = (Mat_MPIDense*)C->data; 2780 c->abdense = ab; 2781 2782 ab->Ae = Ae; 2783 ab->Be = Be; 2784 ab->Ce = Ce; 2785 ab->destroy = C->ops->destroy; 2786 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2787 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2788 C->ops->productnumeric = MatProductNumeric_AB; 2789 PetscFunctionReturn(0); 2790 } 2791 #endif 2792 /* ----------------------------------------------- */ 2793 #if defined(PETSC_HAVE_ELEMENTAL) 2794 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2795 { 2796 PetscFunctionBegin; 2797 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2798 C->ops->productsymbolic = MatProductSymbolic_AB; 2799 C->ops->productnumeric = MatProductNumeric_AB; 2800 PetscFunctionReturn(0); 2801 } 2802 #endif 2803 2804 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 2805 { 2806 PetscErrorCode ierr; 2807 Mat_Product *product = C->product; 2808 2809 PetscFunctionBegin; 2810 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 2811 C->ops->productnumeric = MatProductNumeric_AtB; 2812 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 2813 PetscFunctionReturn(0); 2814 } 2815 2816 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2817 { 2818 Mat_Product *product = C->product; 2819 Mat A = product->A,B=product->B; 2820 2821 PetscFunctionBegin; 2822 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2823 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2824 2825 C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 2826 PetscFunctionReturn(0); 2827 } 2828 2829 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2830 { 2831 PetscErrorCode ierr; 2832 Mat_Product *product = C->product; 2833 const char *algTypes[2] = {"allgatherv","cyclic"}; 2834 PetscInt alg,nalg = 2; 2835 PetscBool flg = PETSC_FALSE; 2836 2837 PetscFunctionBegin; 2838 /* Set default algorithm */ 2839 alg = 0; /* default is allgatherv */ 2840 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2841 if (flg) { 2842 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2843 } 2844 2845 /* Get runtime option */ 2846 if (product->api_user) { 2847 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2848 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2849 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2850 } else { 2851 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2852 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2853 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2854 } 2855 if (flg) { 2856 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2857 } 2858 2859 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2860 C->ops->productsymbolic = MatProductSymbolic_ABt; 2861 C->ops->productnumeric = MatProductNumeric_ABt; 2862 PetscFunctionReturn(0); 2863 } 2864 2865 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2866 { 2867 PetscErrorCode ierr; 2868 Mat_Product *product = C->product; 2869 2870 PetscFunctionBegin; 2871 switch (product->type) { 2872 #if defined(PETSC_HAVE_ELEMENTAL) 2873 case MATPRODUCT_AB: 2874 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2875 break; 2876 #endif 2877 case MATPRODUCT_AtB: 2878 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2879 break; 2880 case MATPRODUCT_ABt: 2881 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2882 break; 2883 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 2884 } 2885 PetscFunctionReturn(0); 2886 } 2887