1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40 { 41 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44 45 PetscFunctionBegin; 46 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47 lrow = row - rstart; 48 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscErrorCode ierr; 56 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57 58 PetscFunctionBegin; 59 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60 lrow = row - rstart; 61 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 66 { 67 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 70 PetscScalar *array; 71 MPI_Comm comm; 72 PetscBool cong; 73 Mat B; 74 75 PetscFunctionBegin; 76 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 77 if (!cong) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79 if (!B) { 80 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 81 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 82 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 83 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 84 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 85 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 86 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 88 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 89 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 90 *a = B; 91 ierr = MatDestroy(&B);CHKERRQ(ierr); 92 } else *a = B; 93 PetscFunctionReturn(0); 94 } 95 96 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 97 { 98 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 99 PetscErrorCode ierr; 100 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 101 PetscBool roworiented = A->roworiented; 102 103 PetscFunctionBegin; 104 for (i=0; i<m; i++) { 105 if (idxm[i] < 0) continue; 106 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 107 if (idxm[i] >= rstart && idxm[i] < rend) { 108 row = idxm[i] - rstart; 109 if (roworiented) { 110 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 111 } else { 112 for (j=0; j<n; j++) { 113 if (idxn[j] < 0) continue; 114 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 115 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 116 } 117 } 118 } else if (!A->donotstash) { 119 mat->assembled = PETSC_FALSE; 120 if (roworiented) { 121 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 122 } else { 123 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 124 } 125 } 126 } 127 PetscFunctionReturn(0); 128 } 129 130 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 131 { 132 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 133 PetscErrorCode ierr; 134 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 135 136 PetscFunctionBegin; 137 for (i=0; i<m; i++) { 138 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 139 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 for (j=0; j<n; j++) { 143 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 144 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 145 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 146 } 147 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 148 } 149 PetscFunctionReturn(0); 150 } 151 152 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 153 { 154 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 159 PetscFunctionReturn(0); 160 } 161 162 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 163 { 164 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 165 PetscErrorCode ierr; 166 167 PetscFunctionBegin; 168 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 183 { 184 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 185 PetscErrorCode ierr; 186 187 PetscFunctionBegin; 188 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 189 PetscFunctionReturn(0); 190 } 191 192 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 193 { 194 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 195 PetscErrorCode ierr; 196 197 PetscFunctionBegin; 198 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 199 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 204 { 205 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 206 PetscErrorCode ierr; 207 208 PetscFunctionBegin; 209 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 210 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 215 { 216 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 217 PetscErrorCode ierr; 218 219 PetscFunctionBegin; 220 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 221 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 226 { 227 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 228 PetscErrorCode ierr; 229 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 230 const PetscInt *irow,*icol; 231 const PetscScalar *v; 232 PetscScalar *bv; 233 Mat newmat; 234 IS iscol_local; 235 MPI_Comm comm_is,comm_mat; 236 237 PetscFunctionBegin; 238 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 239 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 240 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 241 242 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 243 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 244 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 245 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 246 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 247 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 248 249 /* No parallel redistribution currently supported! Should really check each index set 250 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 251 original matrix! */ 252 253 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 254 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 255 256 /* Check submatrix call */ 257 if (scall == MAT_REUSE_MATRIX) { 258 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 259 /* Really need to test rows and column sizes! */ 260 newmat = *B; 261 } else { 262 /* Create and fill new matrix */ 263 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 264 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 265 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 266 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 267 } 268 269 /* Now extract the data pointers and do the copy, column at a time */ 270 newmatd = (Mat_MPIDense*)newmat->data; 271 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 272 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 273 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 274 for (i=0; i<Ncols; i++) { 275 const PetscScalar *av = v + lda*icol[i]; 276 for (j=0; j<nrows; j++) { 277 *bv++ = av[irow[j] - rstart]; 278 } 279 } 280 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 281 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 282 283 /* Assemble the matrices so that the correct flags are set */ 284 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 285 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 286 287 /* Free work space */ 288 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 289 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 290 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 291 *B = newmat; 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 296 { 297 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 298 PetscErrorCode ierr; 299 300 PetscFunctionBegin; 301 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 306 { 307 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 308 PetscErrorCode ierr; 309 310 PetscFunctionBegin; 311 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 315 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 316 { 317 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 322 PetscFunctionReturn(0); 323 } 324 325 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 328 PetscErrorCode ierr; 329 PetscInt nstash,reallocs; 330 331 PetscFunctionBegin; 332 if (mdn->donotstash || mat->nooffprocentries) return(0); 333 334 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 335 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 336 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 341 { 342 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 343 PetscErrorCode ierr; 344 PetscInt i,*row,*col,flg,j,rstart,ncols; 345 PetscMPIInt n; 346 PetscScalar *val; 347 348 PetscFunctionBegin; 349 if (!mdn->donotstash && !mat->nooffprocentries) { 350 /* wait on receives */ 351 while (1) { 352 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 353 if (!flg) break; 354 355 for (i=0; i<n;) { 356 /* Now identify the consecutive vals belonging to the same row */ 357 for (j=i,rstart=row[j]; j<n; j++) { 358 if (row[j] != rstart) break; 359 } 360 if (j < n) ncols = j-i; 361 else ncols = n-i; 362 /* Now assemble all these values with a single function call */ 363 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 364 i = j; 365 } 366 } 367 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 368 } 369 370 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 371 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 372 373 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 374 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 375 } 376 PetscFunctionReturn(0); 377 } 378 379 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 380 { 381 PetscErrorCode ierr; 382 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 383 384 PetscFunctionBegin; 385 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 390 { 391 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 392 PetscErrorCode ierr; 393 PetscInt i,len,*lrows; 394 395 PetscFunctionBegin; 396 /* get locally owned rows */ 397 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 398 /* fix right hand side if needed */ 399 if (x && b) { 400 const PetscScalar *xx; 401 PetscScalar *bb; 402 403 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 404 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 405 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 406 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 407 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 408 } 409 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 410 if (diag != 0.0) { 411 Vec d; 412 413 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 414 ierr = VecSet(d,diag);CHKERRQ(ierr); 415 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 416 ierr = VecDestroy(&d);CHKERRQ(ierr); 417 } 418 ierr = PetscFree(lrows);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 423 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 424 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 425 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 426 427 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 428 { 429 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 430 PetscErrorCode ierr; 431 const PetscScalar *ax; 432 PetscScalar *ay; 433 434 PetscFunctionBegin; 435 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 436 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 437 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 438 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 439 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 440 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 441 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 442 PetscFunctionReturn(0); 443 } 444 445 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 PetscErrorCode ierr; 449 const PetscScalar *ax; 450 PetscScalar *ay; 451 452 PetscFunctionBegin; 453 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 454 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 455 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 456 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 457 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 458 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 459 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 464 { 465 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 466 PetscErrorCode ierr; 467 const PetscScalar *ax; 468 PetscScalar *ay; 469 470 PetscFunctionBegin; 471 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 472 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 473 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 474 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 475 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 476 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 477 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 478 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 483 { 484 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 485 PetscErrorCode ierr; 486 const PetscScalar *ax; 487 PetscScalar *ay; 488 489 PetscFunctionBegin; 490 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 491 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 492 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 493 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 494 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 495 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 496 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 497 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 502 { 503 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 504 PetscErrorCode ierr; 505 PetscInt lda,len,i,n,m = A->rmap->n,radd; 506 PetscScalar *x,zero = 0.0; 507 const PetscScalar *av; 508 509 PetscFunctionBegin; 510 ierr = VecSet(v,zero);CHKERRQ(ierr); 511 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 512 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 513 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 514 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 515 radd = A->rmap->rstart*m; 516 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 517 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 518 for (i=0; i<len; i++) { 519 x[i] = av[radd + i*lda + i]; 520 } 521 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 522 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatDestroy_MPIDense(Mat mat) 527 { 528 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 529 PetscErrorCode ierr; 530 531 PetscFunctionBegin; 532 #if defined(PETSC_USE_LOG) 533 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 534 #endif 535 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 536 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 537 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 538 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 539 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 540 541 ierr = PetscFree(mat->data);CHKERRQ(ierr); 542 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 543 544 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 545 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 546 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 547 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 548 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 549 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 550 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 551 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 552 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 553 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 554 #if defined(PETSC_HAVE_ELEMENTAL) 555 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 556 #endif 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 560 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 574 PetscFunctionReturn(0); 575 } 576 577 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 578 579 #include <petscdraw.h> 580 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 581 { 582 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 583 PetscErrorCode ierr; 584 PetscMPIInt rank = mdn->rank; 585 PetscViewerType vtype; 586 PetscBool iascii,isdraw; 587 PetscViewer sviewer; 588 PetscViewerFormat format; 589 590 PetscFunctionBegin; 591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 593 if (iascii) { 594 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 597 MatInfo info; 598 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 599 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 600 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 601 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 602 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 603 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 604 PetscFunctionReturn(0); 605 } else if (format == PETSC_VIEWER_ASCII_INFO) { 606 PetscFunctionReturn(0); 607 } 608 } else if (isdraw) { 609 PetscDraw draw; 610 PetscBool isnull; 611 612 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 613 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 614 if (isnull) PetscFunctionReturn(0); 615 } 616 617 { 618 /* assemble the entire matrix onto first processor. */ 619 Mat A; 620 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 621 PetscInt *cols; 622 PetscScalar *vals; 623 624 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 625 if (!rank) { 626 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 627 } else { 628 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 629 } 630 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 631 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 632 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 633 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 634 635 /* Copy the matrix ... This isn't the most efficient means, 636 but it's quick for now */ 637 A->insertmode = INSERT_VALUES; 638 639 row = mat->rmap->rstart; 640 m = mdn->A->rmap->n; 641 for (i=0; i<m; i++) { 642 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 643 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 644 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 645 row++; 646 } 647 648 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 649 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 650 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 651 if (!rank) { 652 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 653 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 654 } 655 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 656 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 657 ierr = MatDestroy(&A);CHKERRQ(ierr); 658 } 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 663 { 664 PetscErrorCode ierr; 665 PetscBool iascii,isbinary,isdraw,issocket; 666 667 PetscFunctionBegin; 668 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 669 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 670 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 671 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 672 673 if (iascii || issocket || isdraw) { 674 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 675 } else if (isbinary) { 676 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 677 } 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 682 { 683 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 684 Mat mdn = mat->A; 685 PetscErrorCode ierr; 686 PetscLogDouble isend[5],irecv[5]; 687 688 PetscFunctionBegin; 689 info->block_size = 1.0; 690 691 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 692 693 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 694 isend[3] = info->memory; isend[4] = info->mallocs; 695 if (flag == MAT_LOCAL) { 696 info->nz_used = isend[0]; 697 info->nz_allocated = isend[1]; 698 info->nz_unneeded = isend[2]; 699 info->memory = isend[3]; 700 info->mallocs = isend[4]; 701 } else if (flag == MAT_GLOBAL_MAX) { 702 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 703 704 info->nz_used = irecv[0]; 705 info->nz_allocated = irecv[1]; 706 info->nz_unneeded = irecv[2]; 707 info->memory = irecv[3]; 708 info->mallocs = irecv[4]; 709 } else if (flag == MAT_GLOBAL_SUM) { 710 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 711 712 info->nz_used = irecv[0]; 713 info->nz_allocated = irecv[1]; 714 info->nz_unneeded = irecv[2]; 715 info->memory = irecv[3]; 716 info->mallocs = irecv[4]; 717 } 718 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 719 info->fill_ratio_needed = 0; 720 info->factor_mallocs = 0; 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 725 { 726 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 switch (op) { 731 case MAT_NEW_NONZERO_LOCATIONS: 732 case MAT_NEW_NONZERO_LOCATION_ERR: 733 case MAT_NEW_NONZERO_ALLOCATION_ERR: 734 MatCheckPreallocated(A,1); 735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 736 break; 737 case MAT_ROW_ORIENTED: 738 MatCheckPreallocated(A,1); 739 a->roworiented = flg; 740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 741 break; 742 case MAT_NEW_DIAGONALS: 743 case MAT_KEEP_NONZERO_PATTERN: 744 case MAT_USE_HASH_TABLE: 745 case MAT_SORTED_FULL: 746 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 747 break; 748 case MAT_IGNORE_OFF_PROC_ENTRIES: 749 a->donotstash = flg; 750 break; 751 case MAT_SYMMETRIC: 752 case MAT_STRUCTURALLY_SYMMETRIC: 753 case MAT_HERMITIAN: 754 case MAT_SYMMETRY_ETERNAL: 755 case MAT_IGNORE_LOWER_TRIANGULAR: 756 case MAT_IGNORE_ZERO_ENTRIES: 757 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 758 break; 759 default: 760 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 761 } 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 766 { 767 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 768 const PetscScalar *l; 769 PetscScalar x,*v,*vv,*r; 770 PetscErrorCode ierr; 771 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 772 773 PetscFunctionBegin; 774 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 775 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 776 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 777 if (ll) { 778 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 779 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 780 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 781 for (i=0; i<m; i++) { 782 x = l[i]; 783 v = vv + i; 784 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 785 } 786 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 787 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 788 } 789 if (rr) { 790 const PetscScalar *ar; 791 792 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 793 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 794 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 795 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 796 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 797 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 798 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 799 for (i=0; i<n; i++) { 800 x = r[i]; 801 v = vv + i*lda; 802 for (j=0; j<m; j++) (*v++) *= x; 803 } 804 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 805 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 806 } 807 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 808 PetscFunctionReturn(0); 809 } 810 811 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 812 { 813 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 814 PetscErrorCode ierr; 815 PetscInt i,j; 816 PetscReal sum = 0.0; 817 const PetscScalar *av,*v; 818 819 PetscFunctionBegin; 820 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 821 v = av; 822 if (mdn->size == 1) { 823 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 827 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 828 } 829 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 830 *nrm = PetscSqrtReal(*nrm); 831 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 832 } else if (type == NORM_1) { 833 PetscReal *tmp,*tmp2; 834 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 835 *nrm = 0.0; 836 v = av; 837 for (j=0; j<mdn->A->cmap->n; j++) { 838 for (i=0; i<mdn->A->rmap->n; i++) { 839 tmp[j] += PetscAbsScalar(*v); v++; 840 } 841 } 842 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 843 for (j=0; j<A->cmap->N; j++) { 844 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 845 } 846 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 847 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 848 } else if (type == NORM_INFINITY) { /* max row norm */ 849 PetscReal ntemp; 850 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 851 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 852 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 853 } 854 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 859 { 860 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 861 Mat B; 862 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 863 PetscErrorCode ierr; 864 PetscInt j,i,lda; 865 PetscScalar *v; 866 867 PetscFunctionBegin; 868 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 869 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 870 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 871 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 872 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 873 } else B = *matout; 874 875 m = a->A->rmap->n; n = a->A->cmap->n; 876 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 877 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 878 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 879 for (i=0; i<m; i++) rwork[i] = rstart + i; 880 for (j=0; j<n; j++) { 881 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 882 v += lda; 883 } 884 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 885 ierr = PetscFree(rwork);CHKERRQ(ierr); 886 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 889 *matout = B; 890 } else { 891 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 892 } 893 PetscFunctionReturn(0); 894 } 895 896 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 897 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 898 899 PetscErrorCode MatSetUp_MPIDense(Mat A) 900 { 901 PetscErrorCode ierr; 902 903 PetscFunctionBegin; 904 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 905 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 906 if (!A->preallocated) { 907 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 908 } 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 913 { 914 PetscErrorCode ierr; 915 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 916 917 PetscFunctionBegin; 918 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 PetscErrorCode MatConjugate_MPIDense(Mat mat) 923 { 924 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 925 PetscErrorCode ierr; 926 927 PetscFunctionBegin; 928 ierr = MatConjugate(a->A);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 932 PetscErrorCode MatRealPart_MPIDense(Mat A) 933 { 934 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 ierr = MatRealPart(a->A);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 943 { 944 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 953 { 954 PetscErrorCode ierr; 955 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 956 957 PetscFunctionBegin; 958 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 959 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 960 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 965 966 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 967 { 968 PetscErrorCode ierr; 969 PetscInt i,n; 970 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 971 PetscReal *work; 972 973 PetscFunctionBegin; 974 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 975 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 976 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 977 if (type == NORM_2) { 978 for (i=0; i<n; i++) work[i] *= work[i]; 979 } 980 if (type == NORM_INFINITY) { 981 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 982 } else { 983 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 984 } 985 ierr = PetscFree(work);CHKERRQ(ierr); 986 if (type == NORM_2) { 987 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #if defined(PETSC_HAVE_CUDA) 993 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 994 { 995 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 996 PetscErrorCode ierr; 997 PetscInt lda; 998 999 PetscFunctionBegin; 1000 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1001 if (!a->cvec) { 1002 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1003 } 1004 a->vecinuse = col + 1; 1005 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1006 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1007 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1008 *v = a->cvec; 1009 PetscFunctionReturn(0); 1010 } 1011 1012 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1013 { 1014 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1015 PetscErrorCode ierr; 1016 1017 PetscFunctionBegin; 1018 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1019 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1020 a->vecinuse = 0; 1021 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1022 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1023 *v = NULL; 1024 PetscFunctionReturn(0); 1025 } 1026 1027 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1028 { 1029 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1030 PetscInt lda; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1035 if (!a->cvec) { 1036 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1037 } 1038 a->vecinuse = col + 1; 1039 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1040 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1041 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1042 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1043 *v = a->cvec; 1044 PetscFunctionReturn(0); 1045 } 1046 1047 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1048 { 1049 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1050 PetscErrorCode ierr; 1051 1052 PetscFunctionBegin; 1053 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1054 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1055 a->vecinuse = 0; 1056 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1057 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1058 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1059 *v = NULL; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1064 { 1065 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt lda; 1068 1069 PetscFunctionBegin; 1070 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1071 if (!a->cvec) { 1072 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1073 } 1074 a->vecinuse = col + 1; 1075 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1076 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1077 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1078 *v = a->cvec; 1079 PetscFunctionReturn(0); 1080 } 1081 1082 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1083 { 1084 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1089 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1090 a->vecinuse = 0; 1091 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1092 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1093 *v = NULL; 1094 PetscFunctionReturn(0); 1095 } 1096 1097 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1098 { 1099 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1104 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1109 { 1110 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1115 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1120 { 1121 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1126 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1131 { 1132 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1141 { 1142 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1143 PetscErrorCode ierr; 1144 1145 PetscFunctionBegin; 1146 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1151 { 1152 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1161 { 1162 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1163 PetscErrorCode ierr; 1164 1165 PetscFunctionBegin; 1166 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1171 { 1172 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1181 { 1182 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1191 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1192 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1193 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1194 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1195 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1196 1197 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1198 { 1199 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1204 if (d->A) { 1205 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1206 } 1207 mat->boundtocpu = bind; 1208 if (!bind) { 1209 PetscBool iscuda; 1210 1211 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1212 if (!iscuda) { 1213 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1214 } 1215 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1221 } else { 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1228 } 1229 PetscFunctionReturn(0); 1230 } 1231 1232 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1233 { 1234 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1235 PetscErrorCode ierr; 1236 PetscBool iscuda; 1237 1238 PetscFunctionBegin; 1239 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1240 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1241 if (!iscuda) PetscFunctionReturn(0); 1242 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1243 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1244 if (!d->A) { 1245 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1246 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1247 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1248 } 1249 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1250 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1251 A->preallocated = PETSC_TRUE; 1252 PetscFunctionReturn(0); 1253 } 1254 #endif 1255 1256 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1257 { 1258 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1267 1268 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1269 { 1270 PetscFunctionBegin; 1271 *missing = PETSC_FALSE; 1272 PetscFunctionReturn(0); 1273 } 1274 1275 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1276 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1277 1278 /* -------------------------------------------------------------------*/ 1279 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1280 MatGetRow_MPIDense, 1281 MatRestoreRow_MPIDense, 1282 MatMult_MPIDense, 1283 /* 4*/ MatMultAdd_MPIDense, 1284 MatMultTranspose_MPIDense, 1285 MatMultTransposeAdd_MPIDense, 1286 0, 1287 0, 1288 0, 1289 /* 10*/ 0, 1290 0, 1291 0, 1292 0, 1293 MatTranspose_MPIDense, 1294 /* 15*/ MatGetInfo_MPIDense, 1295 MatEqual_MPIDense, 1296 MatGetDiagonal_MPIDense, 1297 MatDiagonalScale_MPIDense, 1298 MatNorm_MPIDense, 1299 /* 20*/ MatAssemblyBegin_MPIDense, 1300 MatAssemblyEnd_MPIDense, 1301 MatSetOption_MPIDense, 1302 MatZeroEntries_MPIDense, 1303 /* 24*/ MatZeroRows_MPIDense, 1304 0, 1305 0, 1306 0, 1307 0, 1308 /* 29*/ MatSetUp_MPIDense, 1309 0, 1310 0, 1311 MatGetDiagonalBlock_MPIDense, 1312 0, 1313 /* 34*/ MatDuplicate_MPIDense, 1314 0, 1315 0, 1316 0, 1317 0, 1318 /* 39*/ MatAXPY_MPIDense, 1319 MatCreateSubMatrices_MPIDense, 1320 0, 1321 MatGetValues_MPIDense, 1322 0, 1323 /* 44*/ 0, 1324 MatScale_MPIDense, 1325 MatShift_Basic, 1326 0, 1327 0, 1328 /* 49*/ MatSetRandom_MPIDense, 1329 0, 1330 0, 1331 0, 1332 0, 1333 /* 54*/ 0, 1334 0, 1335 0, 1336 0, 1337 0, 1338 /* 59*/ MatCreateSubMatrix_MPIDense, 1339 MatDestroy_MPIDense, 1340 MatView_MPIDense, 1341 0, 1342 0, 1343 /* 64*/ 0, 1344 0, 1345 0, 1346 0, 1347 0, 1348 /* 69*/ 0, 1349 0, 1350 0, 1351 0, 1352 0, 1353 /* 74*/ 0, 1354 0, 1355 0, 1356 0, 1357 0, 1358 /* 79*/ 0, 1359 0, 1360 0, 1361 0, 1362 /* 83*/ MatLoad_MPIDense, 1363 0, 1364 0, 1365 0, 1366 0, 1367 0, 1368 /* 89*/ 0, 1369 0, 1370 MatMatMultNumeric_MPIDense, 1371 0, 1372 0, 1373 /* 94*/ 0, 1374 0, 1375 0, 1376 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1377 0, 1378 /* 99*/ MatProductSetFromOptions_MPIDense, 1379 0, 1380 0, 1381 MatConjugate_MPIDense, 1382 0, 1383 /*104*/ 0, 1384 MatRealPart_MPIDense, 1385 MatImaginaryPart_MPIDense, 1386 0, 1387 0, 1388 /*109*/ 0, 1389 0, 1390 0, 1391 MatGetColumnVector_MPIDense, 1392 MatMissingDiagonal_MPIDense, 1393 /*114*/ 0, 1394 0, 1395 0, 1396 0, 1397 0, 1398 /*119*/ 0, 1399 0, 1400 0, 1401 0, 1402 0, 1403 /*124*/ 0, 1404 MatGetColumnNorms_MPIDense, 1405 0, 1406 0, 1407 0, 1408 /*129*/ 0, 1409 0, 1410 0, 1411 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1412 0, 1413 /*134*/ 0, 1414 0, 1415 0, 1416 0, 1417 0, 1418 /*139*/ 0, 1419 0, 1420 0, 1421 0, 1422 0, 1423 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1424 /*145*/ 0, 1425 0, 1426 0 1427 }; 1428 1429 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1430 { 1431 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1432 PetscBool iscuda; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1437 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1438 if (!a->A) { 1439 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1440 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1441 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1442 } 1443 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1444 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1445 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1446 mat->preallocated = PETSC_TRUE; 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #if defined(PETSC_HAVE_ELEMENTAL) 1451 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1452 { 1453 Mat mat_elemental; 1454 PetscErrorCode ierr; 1455 PetscScalar *v; 1456 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1457 1458 PetscFunctionBegin; 1459 if (reuse == MAT_REUSE_MATRIX) { 1460 mat_elemental = *newmat; 1461 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1462 } else { 1463 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1464 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1465 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1466 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1467 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1468 } 1469 1470 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1471 for (i=0; i<N; i++) cols[i] = i; 1472 for (i=0; i<m; i++) rows[i] = rstart + i; 1473 1474 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1475 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1476 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1477 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1478 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1479 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1480 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1481 1482 if (reuse == MAT_INPLACE_MATRIX) { 1483 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1484 } else { 1485 *newmat = mat_elemental; 1486 } 1487 PetscFunctionReturn(0); 1488 } 1489 #endif 1490 1491 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1492 { 1493 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1494 PetscErrorCode ierr; 1495 1496 PetscFunctionBegin; 1497 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1502 { 1503 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1504 PetscErrorCode ierr; 1505 1506 PetscFunctionBegin; 1507 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1512 { 1513 PetscErrorCode ierr; 1514 Mat_MPIDense *mat; 1515 PetscInt m,nloc,N; 1516 1517 PetscFunctionBegin; 1518 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1519 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1520 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1521 PetscInt sum; 1522 1523 if (n == PETSC_DECIDE) { 1524 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1525 } 1526 /* Check sum(n) = N */ 1527 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1528 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1529 1530 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1531 } 1532 1533 /* numeric phase */ 1534 mat = (Mat_MPIDense*)(*outmat)->data; 1535 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1536 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1537 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #if defined(PETSC_HAVE_CUDA) 1542 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1543 { 1544 Mat B; 1545 Mat_MPIDense *m; 1546 PetscErrorCode ierr; 1547 1548 PetscFunctionBegin; 1549 if (reuse == MAT_INITIAL_MATRIX) { 1550 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1551 } else if (reuse == MAT_REUSE_MATRIX) { 1552 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1553 } 1554 1555 B = *newmat; 1556 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1557 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1558 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1559 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1560 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1561 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1562 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1563 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1564 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1565 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1566 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1567 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1568 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1569 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1570 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1571 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1572 m = (Mat_MPIDense*)(B)->data; 1573 if (m->A) { 1574 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1575 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1576 } 1577 B->ops->bindtocpu = NULL; 1578 B->offloadmask = PETSC_OFFLOAD_CPU; 1579 PetscFunctionReturn(0); 1580 } 1581 1582 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1583 { 1584 Mat B; 1585 Mat_MPIDense *m; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 if (reuse == MAT_INITIAL_MATRIX) { 1590 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1591 } else if (reuse == MAT_REUSE_MATRIX) { 1592 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1593 } 1594 1595 B = *newmat; 1596 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1597 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1598 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1611 m = (Mat_MPIDense*)(B)->data; 1612 if (m->A) { 1613 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1614 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1615 B->offloadmask = PETSC_OFFLOAD_BOTH; 1616 } else { 1617 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1618 } 1619 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1620 1621 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1622 PetscFunctionReturn(0); 1623 } 1624 #endif 1625 1626 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1627 { 1628 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1629 PetscErrorCode ierr; 1630 PetscInt lda; 1631 1632 PetscFunctionBegin; 1633 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1634 if (!a->cvec) { 1635 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1636 } 1637 a->vecinuse = col + 1; 1638 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1639 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1640 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1641 *v = a->cvec; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1646 { 1647 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1652 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1653 a->vecinuse = 0; 1654 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1655 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1656 *v = NULL; 1657 PetscFunctionReturn(0); 1658 } 1659 1660 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1661 { 1662 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1663 PetscErrorCode ierr; 1664 PetscInt lda; 1665 1666 PetscFunctionBegin; 1667 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1668 if (!a->cvec) { 1669 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1670 } 1671 a->vecinuse = col + 1; 1672 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1673 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1674 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1675 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1676 *v = a->cvec; 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1681 { 1682 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1687 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1688 a->vecinuse = 0; 1689 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1690 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1691 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1692 *v = NULL; 1693 PetscFunctionReturn(0); 1694 } 1695 1696 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1697 { 1698 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1699 PetscErrorCode ierr; 1700 PetscInt lda; 1701 1702 PetscFunctionBegin; 1703 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1704 if (!a->cvec) { 1705 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1706 } 1707 a->vecinuse = col + 1; 1708 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1709 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1710 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1711 *v = a->cvec; 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1716 { 1717 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1722 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1723 a->vecinuse = 0; 1724 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1725 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1726 *v = NULL; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1731 { 1732 Mat_MPIDense *a; 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1737 mat->data = (void*)a; 1738 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1739 1740 mat->insertmode = NOT_SET_VALUES; 1741 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1742 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1743 1744 /* build cache for off array entries formed */ 1745 a->donotstash = PETSC_FALSE; 1746 1747 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1748 1749 /* stuff used for matrix vector multiply */ 1750 a->lvec = 0; 1751 a->Mvctx = 0; 1752 a->roworiented = PETSC_TRUE; 1753 1754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1764 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1765 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1766 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1767 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1768 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1769 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1770 #if defined(PETSC_HAVE_ELEMENTAL) 1771 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1772 #endif 1773 #if defined(PETSC_HAVE_CUDA) 1774 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1775 #endif 1776 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1783 1784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1785 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1786 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1788 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 /*MC 1793 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1794 1795 Options Database Keys: 1796 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1797 1798 Level: beginner 1799 1800 .seealso: 1801 1802 M*/ 1803 #if defined(PETSC_HAVE_CUDA) 1804 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1805 { 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1810 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1811 PetscFunctionReturn(0); 1812 } 1813 #endif 1814 1815 /*MC 1816 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1817 1818 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1819 and MATMPIDENSE otherwise. 1820 1821 Options Database Keys: 1822 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1823 1824 Level: beginner 1825 1826 1827 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1828 M*/ 1829 1830 /*MC 1831 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1832 1833 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1834 and MATMPIDENSECUDA otherwise. 1835 1836 Options Database Keys: 1837 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1838 1839 Level: beginner 1840 1841 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1842 M*/ 1843 1844 /*@C 1845 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1846 1847 Not collective 1848 1849 Input Parameters: 1850 . B - the matrix 1851 - data - optional location of matrix data. Set data=NULL for PETSc 1852 to control all matrix memory allocation. 1853 1854 Notes: 1855 The dense format is fully compatible with standard Fortran 77 1856 storage by columns. 1857 1858 The data input variable is intended primarily for Fortran programmers 1859 who wish to allocate their own matrix memory space. Most users should 1860 set data=NULL. 1861 1862 Level: intermediate 1863 1864 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1865 @*/ 1866 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1867 { 1868 PetscErrorCode ierr; 1869 1870 PetscFunctionBegin; 1871 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1872 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 /*@ 1877 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1878 array provided by the user. This is useful to avoid copying an array 1879 into a matrix 1880 1881 Not Collective 1882 1883 Input Parameters: 1884 + mat - the matrix 1885 - array - the array in column major order 1886 1887 Notes: 1888 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1889 freed when the matrix is destroyed. 1890 1891 Level: developer 1892 1893 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1894 1895 @*/ 1896 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 1897 { 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1902 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1903 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1904 #if defined(PETSC_HAVE_CUDA) 1905 mat->offloadmask = PETSC_OFFLOAD_CPU; 1906 #endif 1907 PetscFunctionReturn(0); 1908 } 1909 1910 /*@ 1911 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1912 1913 Not Collective 1914 1915 Input Parameters: 1916 . mat - the matrix 1917 1918 Notes: 1919 You can only call this after a call to MatDensePlaceArray() 1920 1921 Level: developer 1922 1923 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1924 1925 @*/ 1926 PetscErrorCode MatDenseResetArray(Mat mat) 1927 { 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1932 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1933 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 /*@ 1938 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1939 array provided by the user. This is useful to avoid copying an array 1940 into a matrix 1941 1942 Not Collective 1943 1944 Input Parameters: 1945 + mat - the matrix 1946 - array - the array in column major order 1947 1948 Notes: 1949 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 1950 freed by the user. It will be freed when the matrix is destroyed. 1951 1952 Level: developer 1953 1954 .seealso: MatDenseGetArray(), VecReplaceArray() 1955 @*/ 1956 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 1957 { 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1962 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1963 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1964 #if defined(PETSC_HAVE_CUDA) 1965 mat->offloadmask = PETSC_OFFLOAD_CPU; 1966 #endif 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #if defined(PETSC_HAVE_CUDA) 1971 /*@C 1972 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 1973 array provided by the user. This is useful to avoid copying an array 1974 into a matrix 1975 1976 Not Collective 1977 1978 Input Parameters: 1979 + mat - the matrix 1980 - array - the array in column major order 1981 1982 Notes: 1983 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 1984 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 1985 1986 Level: developer 1987 1988 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 1989 @*/ 1990 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 1991 { 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1996 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1997 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1998 mat->offloadmask = PETSC_OFFLOAD_GPU; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /*@C 2003 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2004 2005 Not Collective 2006 2007 Input Parameters: 2008 . mat - the matrix 2009 2010 Notes: 2011 You can only call this after a call to MatDenseCUDAPlaceArray() 2012 2013 Level: developer 2014 2015 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2016 2017 @*/ 2018 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2019 { 2020 PetscErrorCode ierr; 2021 2022 PetscFunctionBegin; 2023 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2024 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2025 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 /*@C 2030 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2031 array provided by the user. This is useful to avoid copying an array 2032 into a matrix 2033 2034 Not Collective 2035 2036 Input Parameters: 2037 + mat - the matrix 2038 - array - the array in column major order 2039 2040 Notes: 2041 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2042 The memory passed in CANNOT be freed by the user. It will be freed 2043 when the matrix is destroyed. The array should respect the matrix leading dimension. 2044 2045 Level: developer 2046 2047 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2048 @*/ 2049 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2050 { 2051 PetscErrorCode ierr; 2052 2053 PetscFunctionBegin; 2054 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2055 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2056 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2057 mat->offloadmask = PETSC_OFFLOAD_GPU; 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /*@C 2062 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2063 2064 Not Collective 2065 2066 Input Parameters: 2067 . A - the matrix 2068 2069 Output Parameters 2070 . array - the GPU array in column major order 2071 2072 Notes: 2073 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2074 2075 Level: developer 2076 2077 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2078 @*/ 2079 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2080 { 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2085 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2086 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 /*@C 2091 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2092 2093 Not Collective 2094 2095 Input Parameters: 2096 + A - the matrix 2097 - array - the GPU array in column major order 2098 2099 Notes: 2100 2101 Level: developer 2102 2103 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2104 @*/ 2105 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2106 { 2107 PetscErrorCode ierr; 2108 2109 PetscFunctionBegin; 2110 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2111 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2112 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2113 A->offloadmask = PETSC_OFFLOAD_GPU; 2114 PetscFunctionReturn(0); 2115 } 2116 2117 /*@C 2118 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2119 2120 Not Collective 2121 2122 Input Parameters: 2123 . A - the matrix 2124 2125 Output Parameters 2126 . array - the GPU array in column major order 2127 2128 Notes: 2129 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2130 2131 Level: developer 2132 2133 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2134 @*/ 2135 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2136 { 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2141 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /*@C 2146 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2147 2148 Not Collective 2149 2150 Input Parameters: 2151 + A - the matrix 2152 - array - the GPU array in column major order 2153 2154 Notes: 2155 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2156 2157 Level: developer 2158 2159 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2160 @*/ 2161 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2162 { 2163 PetscErrorCode ierr; 2164 2165 PetscFunctionBegin; 2166 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /*@C 2171 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2172 2173 Not Collective 2174 2175 Input Parameters: 2176 . A - the matrix 2177 2178 Output Parameters 2179 . array - the GPU array in column major order 2180 2181 Notes: 2182 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2183 2184 Level: developer 2185 2186 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2187 @*/ 2188 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2189 { 2190 PetscErrorCode ierr; 2191 2192 PetscFunctionBegin; 2193 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2194 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2195 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2196 PetscFunctionReturn(0); 2197 } 2198 2199 /*@C 2200 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2201 2202 Not Collective 2203 2204 Input Parameters: 2205 + A - the matrix 2206 - array - the GPU array in column major order 2207 2208 Notes: 2209 2210 Level: developer 2211 2212 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2213 @*/ 2214 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2215 { 2216 PetscErrorCode ierr; 2217 2218 PetscFunctionBegin; 2219 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2220 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2221 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2222 A->offloadmask = PETSC_OFFLOAD_GPU; 2223 PetscFunctionReturn(0); 2224 } 2225 #endif 2226 2227 /*@C 2228 MatCreateDense - Creates a matrix in dense format. 2229 2230 Collective 2231 2232 Input Parameters: 2233 + comm - MPI communicator 2234 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2235 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2236 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2237 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2238 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2239 to control all matrix memory allocation. 2240 2241 Output Parameter: 2242 . A - the matrix 2243 2244 Notes: 2245 The dense format is fully compatible with standard Fortran 77 2246 storage by columns. 2247 2248 The data input variable is intended primarily for Fortran programmers 2249 who wish to allocate their own matrix memory space. Most users should 2250 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2251 2252 The user MUST specify either the local or global matrix dimensions 2253 (possibly both). 2254 2255 Level: intermediate 2256 2257 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2258 @*/ 2259 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2260 { 2261 PetscErrorCode ierr; 2262 PetscMPIInt size; 2263 2264 PetscFunctionBegin; 2265 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2266 PetscValidLogicalCollectiveBool(*A,!!data,6); 2267 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2268 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2269 if (size > 1) { 2270 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2271 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2272 if (data) { /* user provided data array, so no need to assemble */ 2273 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2274 (*A)->assembled = PETSC_TRUE; 2275 } 2276 } else { 2277 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2278 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2279 } 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #if defined(PETSC_HAVE_CUDA) 2284 /*@C 2285 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2286 2287 Collective 2288 2289 Input Parameters: 2290 + comm - MPI communicator 2291 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2292 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2293 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2294 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2295 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2296 to control matrix memory allocation. 2297 2298 Output Parameter: 2299 . A - the matrix 2300 2301 Notes: 2302 2303 Level: intermediate 2304 2305 .seealso: MatCreate(), MatCreateDense() 2306 @*/ 2307 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2308 { 2309 PetscErrorCode ierr; 2310 PetscMPIInt size; 2311 2312 PetscFunctionBegin; 2313 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2314 PetscValidLogicalCollectiveBool(*A,!!data,6); 2315 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2316 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2317 if (size > 1) { 2318 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2319 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2320 if (data) { /* user provided data array, so no need to assemble */ 2321 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2322 (*A)->assembled = PETSC_TRUE; 2323 } 2324 } else { 2325 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2326 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2327 } 2328 PetscFunctionReturn(0); 2329 } 2330 #endif 2331 2332 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2333 { 2334 Mat mat; 2335 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2336 PetscErrorCode ierr; 2337 2338 PetscFunctionBegin; 2339 *newmat = 0; 2340 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2341 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2342 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2343 a = (Mat_MPIDense*)mat->data; 2344 2345 mat->factortype = A->factortype; 2346 mat->assembled = PETSC_TRUE; 2347 mat->preallocated = PETSC_TRUE; 2348 2349 a->size = oldmat->size; 2350 a->rank = oldmat->rank; 2351 mat->insertmode = NOT_SET_VALUES; 2352 a->donotstash = oldmat->donotstash; 2353 2354 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2355 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2356 2357 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2358 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2359 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2360 2361 *newmat = mat; 2362 PetscFunctionReturn(0); 2363 } 2364 2365 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2366 { 2367 PetscErrorCode ierr; 2368 PetscBool isbinary; 2369 #if defined(PETSC_HAVE_HDF5) 2370 PetscBool ishdf5; 2371 #endif 2372 2373 PetscFunctionBegin; 2374 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2375 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2376 /* force binary viewer to load .info file if it has not yet done so */ 2377 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2378 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2379 #if defined(PETSC_HAVE_HDF5) 2380 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2381 #endif 2382 if (isbinary) { 2383 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2384 #if defined(PETSC_HAVE_HDF5) 2385 } else if (ishdf5) { 2386 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2387 #endif 2388 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2393 { 2394 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2395 Mat a,b; 2396 PetscBool flg; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 a = matA->A; 2401 b = matB->A; 2402 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2403 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 2408 { 2409 PetscErrorCode ierr; 2410 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2411 Mat_TransMatMultDense *atb = a->atbdense; 2412 2413 PetscFunctionBegin; 2414 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2415 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2416 ierr = (*atb->destroy)(A);CHKERRQ(ierr); 2417 ierr = PetscFree(atb);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 2422 { 2423 PetscErrorCode ierr; 2424 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2425 Mat_MatTransMultDense *abt = a->abtdense; 2426 2427 PetscFunctionBegin; 2428 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2429 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2430 ierr = (abt->destroy)(A);CHKERRQ(ierr); 2431 ierr = PetscFree(abt);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2436 { 2437 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2438 Mat_TransMatMultDense *atb = c->atbdense; 2439 PetscErrorCode ierr; 2440 MPI_Comm comm; 2441 PetscMPIInt size,*recvcounts=atb->recvcounts; 2442 PetscScalar *carray,*sendbuf=atb->sendbuf; 2443 const PetscScalar *atbarray; 2444 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2445 const PetscInt *ranges; 2446 2447 PetscFunctionBegin; 2448 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2449 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2450 2451 /* compute atbarray = aseq^T * bseq */ 2452 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2453 2454 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2455 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2456 2457 /* arrange atbarray into sendbuf */ 2458 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2459 for (proc=0, k=0; proc<size; proc++) { 2460 for (j=0; j<cN; j++) { 2461 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2462 } 2463 } 2464 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2465 2466 /* sum all atbarray to local values of C */ 2467 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 2468 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2469 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2474 { 2475 PetscErrorCode ierr; 2476 MPI_Comm comm; 2477 PetscMPIInt size; 2478 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2479 Mat_MPIDense *c; 2480 Mat_TransMatMultDense *atb; 2481 2482 PetscFunctionBegin; 2483 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2484 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2485 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2486 } 2487 2488 /* create matrix product C */ 2489 ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2490 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2491 ierr = MatSetUp(C);CHKERRQ(ierr); 2492 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2493 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2494 2495 /* create data structure for reuse C */ 2496 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2497 ierr = PetscNew(&atb);CHKERRQ(ierr); 2498 cM = C->rmap->N; 2499 ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2500 2501 c = (Mat_MPIDense*)C->data; 2502 c->atbdense = atb; 2503 atb->destroy = C->ops->destroy; 2504 C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2505 PetscFunctionReturn(0); 2506 } 2507 2508 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2509 { 2510 PetscErrorCode ierr; 2511 MPI_Comm comm; 2512 PetscMPIInt i, size; 2513 PetscInt maxRows, bufsiz; 2514 Mat_MPIDense *c; 2515 PetscMPIInt tag; 2516 PetscInt alg; 2517 Mat_MatTransMultDense *abt; 2518 Mat_Product *product = C->product; 2519 PetscBool flg; 2520 2521 PetscFunctionBegin; 2522 /* check local size of A and B */ 2523 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2524 2525 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2526 alg = flg ? 0 : 1; 2527 2528 /* setup matrix product C */ 2529 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2530 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2531 ierr = MatSetUp(C);CHKERRQ(ierr); 2532 ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 2533 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2534 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2535 2536 /* create data structure for reuse C */ 2537 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2539 ierr = PetscNew(&abt);CHKERRQ(ierr); 2540 abt->tag = tag; 2541 abt->alg = alg; 2542 switch (alg) { 2543 case 1: /* alg: "cyclic" */ 2544 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2545 bufsiz = A->cmap->N * maxRows; 2546 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2547 break; 2548 default: /* alg: "allgatherv" */ 2549 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2550 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2551 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2552 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2553 break; 2554 } 2555 2556 c = (Mat_MPIDense*)C->data; 2557 c->abtdense = abt; 2558 abt->destroy = C->ops->destroy; 2559 C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2560 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2561 PetscFunctionReturn(0); 2562 } 2563 2564 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2565 { 2566 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2567 Mat_MatTransMultDense *abt = c->abtdense; 2568 PetscErrorCode ierr; 2569 MPI_Comm comm; 2570 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2571 PetscScalar *sendbuf, *recvbuf=0, *cv; 2572 PetscInt i,cK=A->cmap->N,k,j,bn; 2573 PetscScalar _DOne=1.0,_DZero=0.0; 2574 const PetscScalar *av,*bv; 2575 PetscBLASInt cm, cn, ck, alda, blda, clda; 2576 MPI_Request reqs[2]; 2577 const PetscInt *ranges; 2578 2579 PetscFunctionBegin; 2580 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2581 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2582 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2583 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2584 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2585 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2586 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2587 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2588 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2589 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2590 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2591 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2592 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2593 bn = B->rmap->n; 2594 if (blda == bn) { 2595 sendbuf = (PetscScalar*)bv; 2596 } else { 2597 sendbuf = abt->buf[0]; 2598 for (k = 0, i = 0; i < cK; i++) { 2599 for (j = 0; j < bn; j++, k++) { 2600 sendbuf[k] = bv[i * blda + j]; 2601 } 2602 } 2603 } 2604 if (size > 1) { 2605 sendto = (rank + size - 1) % size; 2606 recvfrom = (rank + size + 1) % size; 2607 } else { 2608 sendto = recvfrom = 0; 2609 } 2610 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2611 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2612 recvisfrom = rank; 2613 for (i = 0; i < size; i++) { 2614 /* we have finished receiving in sending, bufs can be read/modified */ 2615 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2616 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2617 2618 if (nextrecvisfrom != rank) { 2619 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2620 sendsiz = cK * bn; 2621 recvsiz = cK * nextbn; 2622 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2623 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2624 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2625 } 2626 2627 /* local aseq * sendbuf^T */ 2628 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2629 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2630 2631 if (nextrecvisfrom != rank) { 2632 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2633 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2634 } 2635 bn = nextbn; 2636 recvisfrom = nextrecvisfrom; 2637 sendbuf = recvbuf; 2638 } 2639 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2640 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2641 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2646 { 2647 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2648 Mat_MatTransMultDense *abt = c->abtdense; 2649 PetscErrorCode ierr; 2650 MPI_Comm comm; 2651 PetscMPIInt size; 2652 PetscScalar *cv, *sendbuf, *recvbuf; 2653 const PetscScalar *av,*bv; 2654 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2655 PetscScalar _DOne=1.0,_DZero=0.0; 2656 PetscBLASInt cm, cn, ck, alda, clda; 2657 2658 PetscFunctionBegin; 2659 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2661 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2662 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2663 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2664 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2665 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2666 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2667 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2668 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2669 /* copy transpose of B into buf[0] */ 2670 bn = B->rmap->n; 2671 sendbuf = abt->buf[0]; 2672 recvbuf = abt->buf[1]; 2673 for (k = 0, j = 0; j < bn; j++) { 2674 for (i = 0; i < cK; i++, k++) { 2675 sendbuf[k] = bv[i * blda + j]; 2676 } 2677 } 2678 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2679 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2680 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2681 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2682 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2683 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2684 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2685 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2686 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2691 { 2692 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2693 Mat_MatTransMultDense *abt = c->abtdense; 2694 PetscErrorCode ierr; 2695 2696 PetscFunctionBegin; 2697 switch (abt->alg) { 2698 case 1: 2699 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2700 break; 2701 default: 2702 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2703 break; 2704 } 2705 PetscFunctionReturn(0); 2706 } 2707 2708 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2709 { 2710 PetscErrorCode ierr; 2711 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2712 Mat_MatMultDense *ab = a->abdense; 2713 2714 PetscFunctionBegin; 2715 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2716 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2717 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2718 2719 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2720 ierr = PetscFree(ab);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #if defined(PETSC_HAVE_ELEMENTAL) 2725 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2726 { 2727 PetscErrorCode ierr; 2728 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2729 Mat_MatMultDense *ab=c->abdense; 2730 2731 PetscFunctionBegin; 2732 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2733 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2734 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2735 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2740 { 2741 PetscErrorCode ierr; 2742 Mat Ae,Be,Ce; 2743 Mat_MPIDense *c; 2744 Mat_MatMultDense *ab; 2745 2746 PetscFunctionBegin; 2747 /* check local size of A and B */ 2748 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2749 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2750 } 2751 2752 /* create elemental matrices Ae and Be */ 2753 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2754 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2755 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2756 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2757 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2758 2759 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2760 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2761 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2762 ierr = MatSetUp(Be);CHKERRQ(ierr); 2763 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2764 2765 /* compute symbolic Ce = Ae*Be */ 2766 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2767 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2768 2769 /* setup C */ 2770 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2771 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2772 ierr = MatSetUp(C);CHKERRQ(ierr); 2773 2774 /* create data structure for reuse Cdense */ 2775 ierr = PetscNew(&ab);CHKERRQ(ierr); 2776 c = (Mat_MPIDense*)C->data; 2777 c->abdense = ab; 2778 2779 ab->Ae = Ae; 2780 ab->Be = Be; 2781 ab->Ce = Ce; 2782 ab->destroy = C->ops->destroy; 2783 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2784 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2785 C->ops->productnumeric = MatProductNumeric_AB; 2786 PetscFunctionReturn(0); 2787 } 2788 #endif 2789 /* ----------------------------------------------- */ 2790 #if defined(PETSC_HAVE_ELEMENTAL) 2791 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2792 { 2793 PetscFunctionBegin; 2794 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2795 C->ops->productsymbolic = MatProductSymbolic_AB; 2796 C->ops->productnumeric = MatProductNumeric_AB; 2797 PetscFunctionReturn(0); 2798 } 2799 #endif 2800 2801 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 2802 { 2803 PetscErrorCode ierr; 2804 Mat_Product *product = C->product; 2805 2806 PetscFunctionBegin; 2807 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 2808 C->ops->productnumeric = MatProductNumeric_AtB; 2809 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 2810 PetscFunctionReturn(0); 2811 } 2812 2813 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2814 { 2815 Mat_Product *product = C->product; 2816 Mat A = product->A,B=product->B; 2817 2818 PetscFunctionBegin; 2819 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2820 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2821 2822 C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 2823 PetscFunctionReturn(0); 2824 } 2825 2826 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2827 { 2828 PetscErrorCode ierr; 2829 Mat_Product *product = C->product; 2830 const char *algTypes[2] = {"allgatherv","cyclic"}; 2831 PetscInt alg,nalg = 2; 2832 PetscBool flg = PETSC_FALSE; 2833 2834 PetscFunctionBegin; 2835 /* Set default algorithm */ 2836 alg = 0; /* default is allgatherv */ 2837 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2838 if (flg) { 2839 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2840 } 2841 2842 /* Get runtime option */ 2843 if (product->api_user) { 2844 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2845 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2846 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2847 } else { 2848 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2849 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2850 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2851 } 2852 if (flg) { 2853 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2854 } 2855 2856 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2857 C->ops->productsymbolic = MatProductSymbolic_ABt; 2858 C->ops->productnumeric = MatProductNumeric_ABt; 2859 PetscFunctionReturn(0); 2860 } 2861 2862 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2863 { 2864 PetscErrorCode ierr; 2865 Mat_Product *product = C->product; 2866 2867 PetscFunctionBegin; 2868 switch (product->type) { 2869 #if defined(PETSC_HAVE_ELEMENTAL) 2870 case MATPRODUCT_AB: 2871 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2872 break; 2873 #endif 2874 case MATPRODUCT_AtB: 2875 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2876 break; 2877 case MATPRODUCT_ABt: 2878 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2879 break; 2880 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 2881 } 2882 PetscFunctionReturn(0); 2883 } 2884