1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 158 { 159 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 160 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 161 PetscErrorCode ierr; 162 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 163 const PetscInt *irow,*icol; 164 PetscScalar *av,*bv,*v = lmat->v; 165 Mat newmat; 166 IS iscol_local; 167 168 PetscFunctionBegin; 169 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 170 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 171 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 172 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 173 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 174 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 175 176 /* No parallel redistribution currently supported! Should really check each index set 177 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 178 original matrix! */ 179 180 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 181 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 182 183 /* Check submatrix call */ 184 if (scall == MAT_REUSE_MATRIX) { 185 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 186 /* Really need to test rows and column sizes! */ 187 newmat = *B; 188 } else { 189 /* Create and fill new matrix */ 190 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 191 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 192 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 193 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 194 } 195 196 /* Now extract the data pointers and do the copy, column at a time */ 197 newmatd = (Mat_MPIDense*)newmat->data; 198 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 199 200 for (i=0; i<Ncols; i++) { 201 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 202 for (j=0; j<nrows; j++) { 203 *bv++ = av[irow[j] - rstart]; 204 } 205 } 206 207 /* Assemble the matrices so that the correct flags are set */ 208 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 209 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 210 211 /* Free work space */ 212 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 213 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 214 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 215 *B = newmat; 216 PetscFunctionReturn(0); 217 } 218 219 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 220 { 221 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 222 PetscErrorCode ierr; 223 224 PetscFunctionBegin; 225 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 230 { 231 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 232 MPI_Comm comm; 233 PetscErrorCode ierr; 234 PetscInt nstash,reallocs; 235 InsertMode addv; 236 237 PetscFunctionBegin; 238 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 239 /* make sure all processors are either in INSERTMODE or ADDMODE */ 240 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 241 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 242 mat->insertmode = addv; /* in case this processor had no cache */ 243 244 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 245 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 246 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 251 { 252 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 253 PetscErrorCode ierr; 254 PetscInt i,*row,*col,flg,j,rstart,ncols; 255 PetscMPIInt n; 256 PetscScalar *val; 257 258 PetscFunctionBegin; 259 /* wait on receives */ 260 while (1) { 261 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 262 if (!flg) break; 263 264 for (i=0; i<n;) { 265 /* Now identify the consecutive vals belonging to the same row */ 266 for (j=i,rstart=row[j]; j<n; j++) { 267 if (row[j] != rstart) break; 268 } 269 if (j < n) ncols = j-i; 270 else ncols = n-i; 271 /* Now assemble all these values with a single function call */ 272 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 273 i = j; 274 } 275 } 276 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 277 278 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 279 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 280 281 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 282 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 283 } 284 PetscFunctionReturn(0); 285 } 286 287 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 288 { 289 PetscErrorCode ierr; 290 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 291 292 PetscFunctionBegin; 293 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 /* the code does not do the diagonal entries correctly unless the 298 matrix is square and the column and row owerships are identical. 299 This is a BUG. The only way to fix it seems to be to access 300 mdn->A and mdn->B directly and not through the MatZeroRows() 301 routine. 302 */ 303 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 304 { 305 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 306 PetscErrorCode ierr; 307 PetscInt i,*owners = A->rmap->range; 308 PetscInt *sizes,j,idx,nsends; 309 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 310 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 311 PetscInt *lens,*lrows,*values; 312 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 313 MPI_Comm comm; 314 MPI_Request *send_waits,*recv_waits; 315 MPI_Status recv_status,*send_status; 316 PetscBool found; 317 const PetscScalar *xx; 318 PetscScalar *bb; 319 320 PetscFunctionBegin; 321 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 322 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 323 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 324 /* first count number of contributors to each processor */ 325 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 326 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 327 for (i=0; i<N; i++) { 328 idx = rows[i]; 329 found = PETSC_FALSE; 330 for (j=0; j<size; j++) { 331 if (idx >= owners[j] && idx < owners[j+1]) { 332 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 333 } 334 } 335 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 336 } 337 nsends = 0; 338 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 339 340 /* inform other processors of number of messages and max length*/ 341 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 342 343 /* post receives: */ 344 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 345 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 346 for (i=0; i<nrecvs; i++) { 347 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 348 } 349 350 /* do sends: 351 1) starts[i] gives the starting index in svalues for stuff going to 352 the ith processor 353 */ 354 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 355 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 356 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 357 358 starts[0] = 0; 359 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 360 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 361 362 starts[0] = 0; 363 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 364 count = 0; 365 for (i=0; i<size; i++) { 366 if (sizes[2*i+1]) { 367 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 368 } 369 } 370 ierr = PetscFree(starts);CHKERRQ(ierr); 371 372 base = owners[rank]; 373 374 /* wait on receives */ 375 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 376 count = nrecvs; 377 slen = 0; 378 while (count) { 379 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 380 /* unpack receives into our local space */ 381 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 382 383 source[imdex] = recv_status.MPI_SOURCE; 384 lens[imdex] = n; 385 slen += n; 386 count--; 387 } 388 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 389 390 /* move the data into the send scatter */ 391 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 392 count = 0; 393 for (i=0; i<nrecvs; i++) { 394 values = rvalues + i*nmax; 395 for (j=0; j<lens[i]; j++) { 396 lrows[count++] = values[j] - base; 397 } 398 } 399 ierr = PetscFree(rvalues);CHKERRQ(ierr); 400 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 401 ierr = PetscFree(owner);CHKERRQ(ierr); 402 ierr = PetscFree(sizes);CHKERRQ(ierr); 403 404 /* fix right hand side if needed */ 405 if (x && b) { 406 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 407 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 408 for (i=0; i<slen; i++) { 409 bb[lrows[i]] = diag*xx[lrows[i]]; 410 } 411 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 412 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 413 } 414 415 /* actually zap the local rows */ 416 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 417 if (diag != 0.0) { 418 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 419 PetscInt m = ll->lda, i; 420 421 for (i=0; i<slen; i++) { 422 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 423 } 424 } 425 ierr = PetscFree(lrows);CHKERRQ(ierr); 426 427 /* wait on sends */ 428 if (nsends) { 429 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 430 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 431 ierr = PetscFree(send_status);CHKERRQ(ierr); 432 } 433 ierr = PetscFree(send_waits);CHKERRQ(ierr); 434 ierr = PetscFree(svalues);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 439 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 440 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 441 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 442 443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 444 { 445 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 446 PetscErrorCode ierr; 447 448 PetscFunctionBegin; 449 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 450 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 451 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 456 { 457 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 458 PetscErrorCode ierr; 459 460 PetscFunctionBegin; 461 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 463 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 468 { 469 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 470 PetscErrorCode ierr; 471 PetscScalar zero = 0.0; 472 473 PetscFunctionBegin; 474 ierr = VecSet(yy,zero);CHKERRQ(ierr); 475 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 476 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 477 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 482 { 483 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 484 PetscErrorCode ierr; 485 486 PetscFunctionBegin; 487 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 488 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 489 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 490 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 494 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 495 { 496 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 497 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 498 PetscErrorCode ierr; 499 PetscInt len,i,n,m = A->rmap->n,radd; 500 PetscScalar *x,zero = 0.0; 501 502 PetscFunctionBegin; 503 ierr = VecSet(v,zero);CHKERRQ(ierr); 504 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 505 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 506 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 507 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 508 radd = A->rmap->rstart*m; 509 for (i=0; i<len; i++) { 510 x[i] = aloc->v[radd + i*m + i]; 511 } 512 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 PetscErrorCode MatDestroy_MPIDense(Mat mat) 517 { 518 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 519 PetscErrorCode ierr; 520 521 PetscFunctionBegin; 522 #if defined(PETSC_USE_LOG) 523 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 524 #endif 525 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 526 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 527 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 528 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 529 530 ierr = PetscFree(mat->data);CHKERRQ(ierr); 531 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 532 533 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 534 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 535 #if defined(PETSC_HAVE_ELEMENTAL) 536 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 537 #endif 538 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 539 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 540 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 541 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 542 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 543 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 544 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 549 { 550 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 551 PetscErrorCode ierr; 552 PetscViewerFormat format; 553 int fd; 554 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 555 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 556 PetscScalar *work,*v,*vv; 557 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 558 559 PetscFunctionBegin; 560 if (mdn->size == 1) { 561 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 562 } else { 563 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 564 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 565 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 566 567 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 568 if (format == PETSC_VIEWER_NATIVE) { 569 570 if (!rank) { 571 /* store the matrix as a dense matrix */ 572 header[0] = MAT_FILE_CLASSID; 573 header[1] = mat->rmap->N; 574 header[2] = N; 575 header[3] = MATRIX_BINARY_FORMAT_DENSE; 576 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 577 578 /* get largest work array needed for transposing array */ 579 mmax = mat->rmap->n; 580 for (i=1; i<size; i++) { 581 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 582 } 583 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 584 585 /* write out local array, by rows */ 586 m = mat->rmap->n; 587 v = a->v; 588 for (j=0; j<N; j++) { 589 for (i=0; i<m; i++) { 590 work[j + i*N] = *v++; 591 } 592 } 593 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 594 /* get largest work array to receive messages from other processes, excludes process zero */ 595 mmax = 0; 596 for (i=1; i<size; i++) { 597 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 598 } 599 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 600 for (k = 1; k < size; k++) { 601 v = vv; 602 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 603 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 604 605 for (j = 0; j < N; j++) { 606 for (i = 0; i < m; i++) { 607 work[j + i*N] = *v++; 608 } 609 } 610 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 611 } 612 ierr = PetscFree(work);CHKERRQ(ierr); 613 ierr = PetscFree(vv);CHKERRQ(ierr); 614 } else { 615 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 616 } 617 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 618 } 619 PetscFunctionReturn(0); 620 } 621 622 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 623 #include <petscdraw.h> 624 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 625 { 626 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 627 PetscErrorCode ierr; 628 PetscMPIInt rank = mdn->rank; 629 PetscViewerType vtype; 630 PetscBool iascii,isdraw; 631 PetscViewer sviewer; 632 PetscViewerFormat format; 633 634 PetscFunctionBegin; 635 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 636 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 637 if (iascii) { 638 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 639 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 640 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 641 MatInfo info; 642 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 643 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 644 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 645 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 646 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 647 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 648 PetscFunctionReturn(0); 649 } else if (format == PETSC_VIEWER_ASCII_INFO) { 650 PetscFunctionReturn(0); 651 } 652 } else if (isdraw) { 653 PetscDraw draw; 654 PetscBool isnull; 655 656 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 657 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 658 if (isnull) PetscFunctionReturn(0); 659 } 660 661 { 662 /* assemble the entire matrix onto first processor. */ 663 Mat A; 664 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 665 PetscInt *cols; 666 PetscScalar *vals; 667 668 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 669 if (!rank) { 670 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 671 } else { 672 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 673 } 674 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 675 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 676 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 677 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 678 679 /* Copy the matrix ... This isn't the most efficient means, 680 but it's quick for now */ 681 A->insertmode = INSERT_VALUES; 682 683 row = mat->rmap->rstart; 684 m = mdn->A->rmap->n; 685 for (i=0; i<m; i++) { 686 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 687 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 688 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 689 row++; 690 } 691 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 695 if (!rank) { 696 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 697 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 698 } 699 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = MatDestroy(&A);CHKERRQ(ierr); 702 } 703 PetscFunctionReturn(0); 704 } 705 706 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 707 { 708 PetscErrorCode ierr; 709 PetscBool iascii,isbinary,isdraw,issocket; 710 711 PetscFunctionBegin; 712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 713 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 714 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 715 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 716 717 if (iascii || issocket || isdraw) { 718 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 719 } else if (isbinary) { 720 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 721 } 722 PetscFunctionReturn(0); 723 } 724 725 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 726 { 727 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 728 Mat mdn = mat->A; 729 PetscErrorCode ierr; 730 PetscReal isend[5],irecv[5]; 731 732 PetscFunctionBegin; 733 info->block_size = 1.0; 734 735 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 736 737 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 738 isend[3] = info->memory; isend[4] = info->mallocs; 739 if (flag == MAT_LOCAL) { 740 info->nz_used = isend[0]; 741 info->nz_allocated = isend[1]; 742 info->nz_unneeded = isend[2]; 743 info->memory = isend[3]; 744 info->mallocs = isend[4]; 745 } else if (flag == MAT_GLOBAL_MAX) { 746 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 747 748 info->nz_used = irecv[0]; 749 info->nz_allocated = irecv[1]; 750 info->nz_unneeded = irecv[2]; 751 info->memory = irecv[3]; 752 info->mallocs = irecv[4]; 753 } else if (flag == MAT_GLOBAL_SUM) { 754 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 755 756 info->nz_used = irecv[0]; 757 info->nz_allocated = irecv[1]; 758 info->nz_unneeded = irecv[2]; 759 info->memory = irecv[3]; 760 info->mallocs = irecv[4]; 761 } 762 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 763 info->fill_ratio_needed = 0; 764 info->factor_mallocs = 0; 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 769 { 770 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 771 PetscErrorCode ierr; 772 773 PetscFunctionBegin; 774 switch (op) { 775 case MAT_NEW_NONZERO_LOCATIONS: 776 case MAT_NEW_NONZERO_LOCATION_ERR: 777 case MAT_NEW_NONZERO_ALLOCATION_ERR: 778 MatCheckPreallocated(A,1); 779 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 780 break; 781 case MAT_ROW_ORIENTED: 782 MatCheckPreallocated(A,1); 783 a->roworiented = flg; 784 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 785 break; 786 case MAT_NEW_DIAGONALS: 787 case MAT_KEEP_NONZERO_PATTERN: 788 case MAT_USE_HASH_TABLE: 789 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 790 break; 791 case MAT_IGNORE_OFF_PROC_ENTRIES: 792 a->donotstash = flg; 793 break; 794 case MAT_SYMMETRIC: 795 case MAT_STRUCTURALLY_SYMMETRIC: 796 case MAT_HERMITIAN: 797 case MAT_SYMMETRY_ETERNAL: 798 case MAT_IGNORE_LOWER_TRIANGULAR: 799 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 800 break; 801 default: 802 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 808 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 809 { 810 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 811 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 812 PetscScalar *l,*r,x,*v; 813 PetscErrorCode ierr; 814 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 815 816 PetscFunctionBegin; 817 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 818 if (ll) { 819 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 820 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 821 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 822 for (i=0; i<m; i++) { 823 x = l[i]; 824 v = mat->v + i; 825 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 826 } 827 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 828 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 829 } 830 if (rr) { 831 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 832 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 833 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 836 for (i=0; i<n; i++) { 837 x = r[i]; 838 v = mat->v + i*m; 839 for (j=0; j<m; j++) (*v++) *= x; 840 } 841 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 842 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 843 } 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 848 { 849 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 850 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 851 PetscErrorCode ierr; 852 PetscInt i,j; 853 PetscReal sum = 0.0; 854 PetscScalar *v = mat->v; 855 856 PetscFunctionBegin; 857 if (mdn->size == 1) { 858 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 859 } else { 860 if (type == NORM_FROBENIUS) { 861 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 862 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 863 } 864 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 865 *nrm = PetscSqrtReal(*nrm); 866 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 867 } else if (type == NORM_1) { 868 PetscReal *tmp,*tmp2; 869 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 870 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 871 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 872 *nrm = 0.0; 873 v = mat->v; 874 for (j=0; j<mdn->A->cmap->n; j++) { 875 for (i=0; i<mdn->A->rmap->n; i++) { 876 tmp[j] += PetscAbsScalar(*v); v++; 877 } 878 } 879 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 880 for (j=0; j<A->cmap->N; j++) { 881 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 882 } 883 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 884 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 885 } else if (type == NORM_INFINITY) { /* max row norm */ 886 PetscReal ntemp; 887 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 888 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 889 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 890 } 891 PetscFunctionReturn(0); 892 } 893 894 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 895 { 896 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 897 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 898 Mat B; 899 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 900 PetscErrorCode ierr; 901 PetscInt j,i; 902 PetscScalar *v; 903 904 PetscFunctionBegin; 905 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 906 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 907 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 908 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 909 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 910 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 911 } else { 912 B = *matout; 913 } 914 915 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 916 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 917 for (i=0; i<m; i++) rwork[i] = rstart + i; 918 for (j=0; j<n; j++) { 919 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 920 v += m; 921 } 922 ierr = PetscFree(rwork);CHKERRQ(ierr); 923 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 925 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 926 *matout = B; 927 } else { 928 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 934 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 935 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 936 937 PetscErrorCode MatSetUp_MPIDense(Mat A) 938 { 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 947 { 948 PetscErrorCode ierr; 949 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 950 951 PetscFunctionBegin; 952 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 953 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatConjugate_MPIDense(Mat mat) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 ierr = MatConjugate(a->A);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatRealPart_MPIDense(Mat A) 968 { 969 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 ierr = MatRealPart(a->A);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 978 { 979 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 988 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 989 { 990 PetscErrorCode ierr; 991 PetscInt i,n; 992 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 993 PetscReal *work; 994 995 PetscFunctionBegin; 996 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 997 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 998 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 999 if (type == NORM_2) { 1000 for (i=0; i<n; i++) work[i] *= work[i]; 1001 } 1002 if (type == NORM_INFINITY) { 1003 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1004 } else { 1005 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(work);CHKERRQ(ierr); 1008 if (type == NORM_2) { 1009 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1015 { 1016 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1017 PetscErrorCode ierr; 1018 PetscScalar *a; 1019 PetscInt m,n,i; 1020 1021 PetscFunctionBegin; 1022 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1023 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1024 for (i=0; i<m*n; i++) { 1025 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1026 } 1027 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1032 1033 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1034 { 1035 PetscFunctionBegin; 1036 *missing = PETSC_FALSE; 1037 PetscFunctionReturn(0); 1038 } 1039 1040 /* -------------------------------------------------------------------*/ 1041 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1042 MatGetRow_MPIDense, 1043 MatRestoreRow_MPIDense, 1044 MatMult_MPIDense, 1045 /* 4*/ MatMultAdd_MPIDense, 1046 MatMultTranspose_MPIDense, 1047 MatMultTransposeAdd_MPIDense, 1048 0, 1049 0, 1050 0, 1051 /* 10*/ 0, 1052 0, 1053 0, 1054 0, 1055 MatTranspose_MPIDense, 1056 /* 15*/ MatGetInfo_MPIDense, 1057 MatEqual_MPIDense, 1058 MatGetDiagonal_MPIDense, 1059 MatDiagonalScale_MPIDense, 1060 MatNorm_MPIDense, 1061 /* 20*/ MatAssemblyBegin_MPIDense, 1062 MatAssemblyEnd_MPIDense, 1063 MatSetOption_MPIDense, 1064 MatZeroEntries_MPIDense, 1065 /* 24*/ MatZeroRows_MPIDense, 1066 0, 1067 0, 1068 0, 1069 0, 1070 /* 29*/ MatSetUp_MPIDense, 1071 0, 1072 0, 1073 MatGetDiagonalBlock_MPIDense, 1074 0, 1075 /* 34*/ MatDuplicate_MPIDense, 1076 0, 1077 0, 1078 0, 1079 0, 1080 /* 39*/ MatAXPY_MPIDense, 1081 MatCreateSubMatrices_MPIDense, 1082 0, 1083 MatGetValues_MPIDense, 1084 0, 1085 /* 44*/ 0, 1086 MatScale_MPIDense, 1087 MatShift_Basic, 1088 0, 1089 0, 1090 /* 49*/ MatSetRandom_MPIDense, 1091 0, 1092 0, 1093 0, 1094 0, 1095 /* 54*/ 0, 1096 0, 1097 0, 1098 0, 1099 0, 1100 /* 59*/ MatCreateSubMatrix_MPIDense, 1101 MatDestroy_MPIDense, 1102 MatView_MPIDense, 1103 0, 1104 0, 1105 /* 64*/ 0, 1106 0, 1107 0, 1108 0, 1109 0, 1110 /* 69*/ 0, 1111 0, 1112 0, 1113 0, 1114 0, 1115 /* 74*/ 0, 1116 0, 1117 0, 1118 0, 1119 0, 1120 /* 79*/ 0, 1121 0, 1122 0, 1123 0, 1124 /* 83*/ MatLoad_MPIDense, 1125 0, 1126 0, 1127 0, 1128 0, 1129 0, 1130 #if defined(PETSC_HAVE_ELEMENTAL) 1131 /* 89*/ MatMatMult_MPIDense_MPIDense, 1132 MatMatMultSymbolic_MPIDense_MPIDense, 1133 #else 1134 /* 89*/ 0, 1135 0, 1136 #endif 1137 MatMatMultNumeric_MPIDense, 1138 0, 1139 0, 1140 /* 94*/ 0, 1141 0, 1142 0, 1143 0, 1144 0, 1145 /* 99*/ 0, 1146 0, 1147 0, 1148 MatConjugate_MPIDense, 1149 0, 1150 /*104*/ 0, 1151 MatRealPart_MPIDense, 1152 MatImaginaryPart_MPIDense, 1153 0, 1154 0, 1155 /*109*/ 0, 1156 0, 1157 0, 1158 0, 1159 MatMissingDiagonal_MPIDense, 1160 /*114*/ 0, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /*119*/ 0, 1166 0, 1167 0, 1168 0, 1169 0, 1170 /*124*/ 0, 1171 MatGetColumnNorms_MPIDense, 1172 0, 1173 0, 1174 0, 1175 /*129*/ 0, 1176 MatTransposeMatMult_MPIDense_MPIDense, 1177 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1178 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1179 0, 1180 /*134*/ 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /*139*/ 0, 1186 0, 1187 0 1188 }; 1189 1190 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1191 { 1192 Mat_MPIDense *a; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 mat->preallocated = PETSC_TRUE; 1197 /* Note: For now, when data is specified above, this assumes the user correctly 1198 allocates the local dense storage space. We should add error checking. */ 1199 1200 a = (Mat_MPIDense*)mat->data; 1201 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1202 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1203 a->nvec = mat->cmap->n; 1204 1205 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1206 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1207 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1208 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1209 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #if defined(PETSC_HAVE_ELEMENTAL) 1214 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1215 { 1216 Mat mat_elemental; 1217 PetscErrorCode ierr; 1218 PetscScalar *v; 1219 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1220 1221 PetscFunctionBegin; 1222 if (reuse == MAT_REUSE_MATRIX) { 1223 mat_elemental = *newmat; 1224 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1225 } else { 1226 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1227 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1228 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1229 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1230 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1231 } 1232 1233 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1234 for (i=0; i<N; i++) cols[i] = i; 1235 for (i=0; i<m; i++) rows[i] = rstart + i; 1236 1237 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1238 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1239 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1240 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1241 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1242 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1243 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1244 1245 if (reuse == MAT_INPLACE_MATRIX) { 1246 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1247 } else { 1248 *newmat = mat_elemental; 1249 } 1250 PetscFunctionReturn(0); 1251 } 1252 #endif 1253 1254 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1255 { 1256 Mat_MPIDense *a; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1261 mat->data = (void*)a; 1262 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1263 1264 mat->insertmode = NOT_SET_VALUES; 1265 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1266 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1267 1268 /* build cache for off array entries formed */ 1269 a->donotstash = PETSC_FALSE; 1270 1271 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1272 1273 /* stuff used for matrix vector multiply */ 1274 a->lvec = 0; 1275 a->Mvctx = 0; 1276 a->roworiented = PETSC_TRUE; 1277 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1280 #if defined(PETSC_HAVE_ELEMENTAL) 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1287 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1291 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 /*MC 1296 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1297 1298 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1299 and MATMPIDENSE otherwise. 1300 1301 Options Database Keys: 1302 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1303 1304 Level: beginner 1305 1306 1307 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1308 M*/ 1309 1310 /*@C 1311 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1312 1313 Not collective 1314 1315 Input Parameters: 1316 . B - the matrix 1317 - data - optional location of matrix data. Set data=NULL for PETSc 1318 to control all matrix memory allocation. 1319 1320 Notes: 1321 The dense format is fully compatible with standard Fortran 77 1322 storage by columns. 1323 1324 The data input variable is intended primarily for Fortran programmers 1325 who wish to allocate their own matrix memory space. Most users should 1326 set data=NULL. 1327 1328 Level: intermediate 1329 1330 .keywords: matrix,dense, parallel 1331 1332 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1333 @*/ 1334 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1335 { 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 /*@C 1344 MatCreateDense - Creates a parallel matrix in dense format. 1345 1346 Collective on MPI_Comm 1347 1348 Input Parameters: 1349 + comm - MPI communicator 1350 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1351 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1352 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1353 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1354 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1355 to control all matrix memory allocation. 1356 1357 Output Parameter: 1358 . A - the matrix 1359 1360 Notes: 1361 The dense format is fully compatible with standard Fortran 77 1362 storage by columns. 1363 1364 The data input variable is intended primarily for Fortran programmers 1365 who wish to allocate their own matrix memory space. Most users should 1366 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1367 1368 The user MUST specify either the local or global matrix dimensions 1369 (possibly both). 1370 1371 Level: intermediate 1372 1373 .keywords: matrix,dense, parallel 1374 1375 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1376 @*/ 1377 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1378 { 1379 PetscErrorCode ierr; 1380 PetscMPIInt size; 1381 1382 PetscFunctionBegin; 1383 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1384 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1385 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1386 if (size > 1) { 1387 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1388 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1389 if (data) { /* user provided data array, so no need to assemble */ 1390 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1391 (*A)->assembled = PETSC_TRUE; 1392 } 1393 } else { 1394 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1395 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1401 { 1402 Mat mat; 1403 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1404 PetscErrorCode ierr; 1405 1406 PetscFunctionBegin; 1407 *newmat = 0; 1408 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1409 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1410 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1411 a = (Mat_MPIDense*)mat->data; 1412 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1413 1414 mat->factortype = A->factortype; 1415 mat->assembled = PETSC_TRUE; 1416 mat->preallocated = PETSC_TRUE; 1417 1418 a->size = oldmat->size; 1419 a->rank = oldmat->rank; 1420 mat->insertmode = NOT_SET_VALUES; 1421 a->nvec = oldmat->nvec; 1422 a->donotstash = oldmat->donotstash; 1423 1424 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1425 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1426 1427 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1428 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1429 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1430 1431 *newmat = mat; 1432 PetscFunctionReturn(0); 1433 } 1434 1435 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1436 { 1437 PetscErrorCode ierr; 1438 PetscMPIInt rank,size; 1439 const PetscInt *rowners; 1440 PetscInt i,m,n,nz,j,mMax; 1441 PetscScalar *array,*vals,*vals_ptr; 1442 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1443 1444 PetscFunctionBegin; 1445 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1446 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1447 1448 /* determine ownership of rows and columns */ 1449 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1450 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1451 1452 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1453 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1454 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1455 } 1456 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1457 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1458 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1459 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1460 if (!rank) { 1461 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1462 1463 /* read in my part of the matrix numerical values */ 1464 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1465 1466 /* insert into matrix-by row (this is why cannot directly read into array */ 1467 vals_ptr = vals; 1468 for (i=0; i<m; i++) { 1469 for (j=0; j<N; j++) { 1470 array[i + j*m] = *vals_ptr++; 1471 } 1472 } 1473 1474 /* read in other processors and ship out */ 1475 for (i=1; i<size; i++) { 1476 nz = (rowners[i+1] - rowners[i])*N; 1477 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1478 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1479 } 1480 } else { 1481 /* receive numeric values */ 1482 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1483 1484 /* receive message of values*/ 1485 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1486 1487 /* insert into matrix-by row (this is why cannot directly read into array */ 1488 vals_ptr = vals; 1489 for (i=0; i<m; i++) { 1490 for (j=0; j<N; j++) { 1491 array[i + j*m] = *vals_ptr++; 1492 } 1493 } 1494 } 1495 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1496 ierr = PetscFree(vals);CHKERRQ(ierr); 1497 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1498 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1503 { 1504 Mat_MPIDense *a; 1505 PetscScalar *vals,*svals; 1506 MPI_Comm comm; 1507 MPI_Status status; 1508 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1509 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1510 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1511 PetscInt i,nz,j,rstart,rend; 1512 int fd; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 /* force binary viewer to load .info file if it has not yet done so */ 1517 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1518 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1519 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1520 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1521 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1522 if (!rank) { 1523 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1524 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1525 } 1526 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1527 M = header[1]; N = header[2]; nz = header[3]; 1528 1529 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1530 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1531 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1532 1533 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1534 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1535 1536 /* 1537 Handle case where matrix is stored on disk as a dense matrix 1538 */ 1539 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1540 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 /* determine ownership of all rows */ 1545 if (newmat->rmap->n < 0) { 1546 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1547 } else { 1548 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1549 } 1550 if (newmat->cmap->n < 0) { 1551 n = PETSC_DECIDE; 1552 } else { 1553 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1554 } 1555 1556 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1557 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1558 rowners[0] = 0; 1559 for (i=2; i<=size; i++) { 1560 rowners[i] += rowners[i-1]; 1561 } 1562 rstart = rowners[rank]; 1563 rend = rowners[rank+1]; 1564 1565 /* distribute row lengths to all processors */ 1566 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1567 if (!rank) { 1568 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1569 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1570 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1571 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1572 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1573 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1574 } else { 1575 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1576 } 1577 1578 if (!rank) { 1579 /* calculate the number of nonzeros on each processor */ 1580 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1581 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1582 for (i=0; i<size; i++) { 1583 for (j=rowners[i]; j< rowners[i+1]; j++) { 1584 procsnz[i] += rowlengths[j]; 1585 } 1586 } 1587 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1588 1589 /* determine max buffer needed and allocate it */ 1590 maxnz = 0; 1591 for (i=0; i<size; i++) { 1592 maxnz = PetscMax(maxnz,procsnz[i]); 1593 } 1594 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1595 1596 /* read in my part of the matrix column indices */ 1597 nz = procsnz[0]; 1598 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1599 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1600 1601 /* read in every one elses and ship off */ 1602 for (i=1; i<size; i++) { 1603 nz = procsnz[i]; 1604 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1605 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1606 } 1607 ierr = PetscFree(cols);CHKERRQ(ierr); 1608 } else { 1609 /* determine buffer space needed for message */ 1610 nz = 0; 1611 for (i=0; i<m; i++) { 1612 nz += ourlens[i]; 1613 } 1614 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1615 1616 /* receive message of column indices*/ 1617 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1618 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1619 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1620 } 1621 1622 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1623 a = (Mat_MPIDense*)newmat->data; 1624 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1625 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1626 } 1627 1628 if (!rank) { 1629 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1630 1631 /* read in my part of the matrix numerical values */ 1632 nz = procsnz[0]; 1633 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1634 1635 /* insert into matrix */ 1636 jj = rstart; 1637 smycols = mycols; 1638 svals = vals; 1639 for (i=0; i<m; i++) { 1640 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1641 smycols += ourlens[i]; 1642 svals += ourlens[i]; 1643 jj++; 1644 } 1645 1646 /* read in other processors and ship out */ 1647 for (i=1; i<size; i++) { 1648 nz = procsnz[i]; 1649 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1650 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1651 } 1652 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1653 } else { 1654 /* receive numeric values */ 1655 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1656 1657 /* receive message of values*/ 1658 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1659 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1660 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1661 1662 /* insert into matrix */ 1663 jj = rstart; 1664 smycols = mycols; 1665 svals = vals; 1666 for (i=0; i<m; i++) { 1667 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1668 smycols += ourlens[i]; 1669 svals += ourlens[i]; 1670 jj++; 1671 } 1672 } 1673 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1674 ierr = PetscFree(vals);CHKERRQ(ierr); 1675 ierr = PetscFree(mycols);CHKERRQ(ierr); 1676 ierr = PetscFree(rowners);CHKERRQ(ierr); 1677 1678 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1679 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 1683 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1684 { 1685 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1686 Mat a,b; 1687 PetscBool flg; 1688 PetscErrorCode ierr; 1689 1690 PetscFunctionBegin; 1691 a = matA->A; 1692 b = matB->A; 1693 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1694 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1699 { 1700 PetscErrorCode ierr; 1701 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1702 Mat_TransMatMultDense *atb = a->atbdense; 1703 1704 PetscFunctionBegin; 1705 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1706 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1707 ierr = PetscFree(atb);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1712 { 1713 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1714 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1715 Mat_TransMatMultDense *atb = c->atbdense; 1716 PetscErrorCode ierr; 1717 MPI_Comm comm; 1718 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1719 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1720 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1721 PetscScalar _DOne=1.0,_DZero=0.0; 1722 PetscBLASInt am,an,bn,aN; 1723 const PetscInt *ranges; 1724 1725 PetscFunctionBegin; 1726 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1727 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1728 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1729 1730 /* compute atbarray = aseq^T * bseq */ 1731 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1732 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1733 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1734 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1735 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1736 1737 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1738 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1739 1740 /* arrange atbarray into sendbuf */ 1741 k = 0; 1742 for (proc=0; proc<size; proc++) { 1743 for (j=0; j<cN; j++) { 1744 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1745 } 1746 } 1747 /* sum all atbarray to local values of C */ 1748 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1749 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1750 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1755 { 1756 PetscErrorCode ierr; 1757 Mat Cdense; 1758 MPI_Comm comm; 1759 PetscMPIInt size; 1760 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1761 Mat_MPIDense *c; 1762 Mat_TransMatMultDense *atb; 1763 1764 PetscFunctionBegin; 1765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1766 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1767 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1768 } 1769 1770 /* create matrix product Cdense */ 1771 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1772 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1773 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1774 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1775 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1776 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1777 *C = Cdense; 1778 1779 /* create data structure for reuse Cdense */ 1780 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1781 ierr = PetscNew(&atb);CHKERRQ(ierr); 1782 cM = Cdense->rmap->N; 1783 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1784 1785 c = (Mat_MPIDense*)Cdense->data; 1786 c->atbdense = atb; 1787 atb->destroy = Cdense->ops->destroy; 1788 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1793 { 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 if (scall == MAT_INITIAL_MATRIX) { 1798 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1799 } 1800 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1805 { 1806 PetscErrorCode ierr; 1807 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1808 Mat_MatMultDense *ab = a->abdense; 1809 1810 PetscFunctionBegin; 1811 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1812 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1813 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1814 1815 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1816 ierr = PetscFree(ab);CHKERRQ(ierr); 1817 PetscFunctionReturn(0); 1818 } 1819 1820 #if defined(PETSC_HAVE_ELEMENTAL) 1821 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1822 { 1823 PetscErrorCode ierr; 1824 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1825 Mat_MatMultDense *ab=c->abdense; 1826 1827 PetscFunctionBegin; 1828 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1829 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1830 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1831 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1836 { 1837 PetscErrorCode ierr; 1838 Mat Ae,Be,Ce; 1839 Mat_MPIDense *c; 1840 Mat_MatMultDense *ab; 1841 1842 PetscFunctionBegin; 1843 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1844 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1845 } 1846 1847 /* convert A and B to Elemental matrices Ae and Be */ 1848 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1849 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1850 1851 /* Ce = Ae*Be */ 1852 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1853 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1854 1855 /* convert Ce to C */ 1856 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1857 1858 /* create data structure for reuse Cdense */ 1859 ierr = PetscNew(&ab);CHKERRQ(ierr); 1860 c = (Mat_MPIDense*)(*C)->data; 1861 c->abdense = ab; 1862 1863 ab->Ae = Ae; 1864 ab->Be = Be; 1865 ab->Ce = Ce; 1866 ab->destroy = (*C)->ops->destroy; 1867 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1868 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1869 PetscFunctionReturn(0); 1870 } 1871 1872 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1873 { 1874 PetscErrorCode ierr; 1875 1876 PetscFunctionBegin; 1877 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1878 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1879 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1880 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1881 } else { 1882 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1883 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1884 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 #endif 1889