1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 178 { 179 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 180 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 181 PetscErrorCode ierr; 182 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 183 const PetscInt *irow,*icol; 184 PetscScalar *av,*bv,*v = lmat->v; 185 Mat newmat; 186 IS iscol_local; 187 188 PetscFunctionBegin; 189 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 190 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 191 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 193 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 194 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 195 196 /* No parallel redistribution currently supported! Should really check each index set 197 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 198 original matrix! */ 199 200 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 201 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 202 203 /* Check submatrix call */ 204 if (scall == MAT_REUSE_MATRIX) { 205 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 206 /* Really need to test rows and column sizes! */ 207 newmat = *B; 208 } else { 209 /* Create and fill new matrix */ 210 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 211 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 212 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 213 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 214 } 215 216 /* Now extract the data pointers and do the copy, column at a time */ 217 newmatd = (Mat_MPIDense*)newmat->data; 218 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 219 220 for (i=0; i<Ncols; i++) { 221 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 222 for (j=0; j<nrows; j++) { 223 *bv++ = av[irow[j] - rstart]; 224 } 225 } 226 227 /* Assemble the matrices so that the correct flags are set */ 228 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 231 /* Free work space */ 232 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 233 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 234 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 235 *B = newmat; 236 PetscFunctionReturn(0); 237 } 238 239 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 240 { 241 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 242 PetscErrorCode ierr; 243 244 PetscFunctionBegin; 245 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt i,*row,*col,flg,j,rstart,ncols; 275 PetscMPIInt n; 276 PetscScalar *val; 277 278 PetscFunctionBegin; 279 /* wait on receives */ 280 while (1) { 281 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 282 if (!flg) break; 283 284 for (i=0; i<n;) { 285 /* Now identify the consecutive vals belonging to the same row */ 286 for (j=i,rstart=row[j]; j<n; j++) { 287 if (row[j] != rstart) break; 288 } 289 if (j < n) ncols = j-i; 290 else ncols = n-i; 291 /* Now assemble all these values with a single function call */ 292 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 293 i = j; 294 } 295 } 296 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 297 298 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 299 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 300 301 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 302 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 303 } 304 PetscFunctionReturn(0); 305 } 306 307 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 308 { 309 PetscErrorCode ierr; 310 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 311 312 PetscFunctionBegin; 313 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 /* the code does not do the diagonal entries correctly unless the 318 matrix is square and the column and row owerships are identical. 319 This is a BUG. The only way to fix it seems to be to access 320 mdn->A and mdn->B directly and not through the MatZeroRows() 321 routine. 322 */ 323 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 324 { 325 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 326 PetscErrorCode ierr; 327 PetscInt i,*owners = A->rmap->range; 328 PetscInt *sizes,j,idx,nsends; 329 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 330 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 331 PetscInt *lens,*lrows,*values; 332 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 333 MPI_Comm comm; 334 MPI_Request *send_waits,*recv_waits; 335 MPI_Status recv_status,*send_status; 336 PetscBool found; 337 const PetscScalar *xx; 338 PetscScalar *bb; 339 340 PetscFunctionBegin; 341 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 342 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 343 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 344 /* first count number of contributors to each processor */ 345 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 346 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 347 for (i=0; i<N; i++) { 348 idx = rows[i]; 349 found = PETSC_FALSE; 350 for (j=0; j<size; j++) { 351 if (idx >= owners[j] && idx < owners[j+1]) { 352 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 353 } 354 } 355 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 356 } 357 nsends = 0; 358 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 359 360 /* inform other processors of number of messages and max length*/ 361 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 362 363 /* post receives: */ 364 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 365 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 366 for (i=0; i<nrecvs; i++) { 367 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 368 } 369 370 /* do sends: 371 1) starts[i] gives the starting index in svalues for stuff going to 372 the ith processor 373 */ 374 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 375 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 376 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 377 378 starts[0] = 0; 379 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 380 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 381 382 starts[0] = 0; 383 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 384 count = 0; 385 for (i=0; i<size; i++) { 386 if (sizes[2*i+1]) { 387 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscFree(starts);CHKERRQ(ierr); 391 392 base = owners[rank]; 393 394 /* wait on receives */ 395 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 396 count = nrecvs; 397 slen = 0; 398 while (count) { 399 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 400 /* unpack receives into our local space */ 401 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 402 403 source[imdex] = recv_status.MPI_SOURCE; 404 lens[imdex] = n; 405 slen += n; 406 count--; 407 } 408 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 409 410 /* move the data into the send scatter */ 411 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 412 count = 0; 413 for (i=0; i<nrecvs; i++) { 414 values = rvalues + i*nmax; 415 for (j=0; j<lens[i]; j++) { 416 lrows[count++] = values[j] - base; 417 } 418 } 419 ierr = PetscFree(rvalues);CHKERRQ(ierr); 420 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 421 ierr = PetscFree(owner);CHKERRQ(ierr); 422 ierr = PetscFree(sizes);CHKERRQ(ierr); 423 424 /* fix right hand side if needed */ 425 if (x && b) { 426 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 427 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 428 for (i=0; i<slen; i++) { 429 bb[lrows[i]] = diag*xx[lrows[i]]; 430 } 431 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 432 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 433 } 434 435 /* actually zap the local rows */ 436 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 437 if (diag != 0.0) { 438 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 439 PetscInt m = ll->lda, i; 440 441 for (i=0; i<slen; i++) { 442 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 443 } 444 } 445 ierr = PetscFree(lrows);CHKERRQ(ierr); 446 447 /* wait on sends */ 448 if (nsends) { 449 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 450 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 451 ierr = PetscFree(send_status);CHKERRQ(ierr); 452 } 453 ierr = PetscFree(send_waits);CHKERRQ(ierr); 454 ierr = PetscFree(svalues);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 460 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 461 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 462 463 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 464 { 465 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 466 PetscErrorCode ierr; 467 468 PetscFunctionBegin; 469 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 476 { 477 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 478 PetscErrorCode ierr; 479 480 PetscFunctionBegin; 481 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 483 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 488 { 489 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 490 PetscErrorCode ierr; 491 PetscScalar zero = 0.0; 492 493 PetscFunctionBegin; 494 ierr = VecSet(yy,zero);CHKERRQ(ierr); 495 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 496 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 502 { 503 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 508 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 509 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 518 PetscErrorCode ierr; 519 PetscInt len,i,n,m = A->rmap->n,radd; 520 PetscScalar *x,zero = 0.0; 521 522 PetscFunctionBegin; 523 ierr = VecSet(v,zero);CHKERRQ(ierr); 524 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 525 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 526 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 527 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 528 radd = A->rmap->rstart*m; 529 for (i=0; i<len; i++) { 530 x[i] = aloc->v[radd + i*m + i]; 531 } 532 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatDestroy_MPIDense(Mat mat) 537 { 538 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 #if defined(PETSC_USE_LOG) 543 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 544 #endif 545 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 546 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 547 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 548 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 549 550 ierr = PetscFree(mat->data);CHKERRQ(ierr); 551 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 552 553 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 554 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 556 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 558 #if defined(PETSC_HAVE_ELEMENTAL) 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 560 #endif 561 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 PetscViewerFormat format; 576 int fd; 577 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 578 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 579 PetscScalar *work,*v,*vv; 580 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 581 582 PetscFunctionBegin; 583 if (mdn->size == 1) { 584 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 585 } else { 586 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 587 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 588 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 589 590 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 591 if (format == PETSC_VIEWER_NATIVE) { 592 593 if (!rank) { 594 /* store the matrix as a dense matrix */ 595 header[0] = MAT_FILE_CLASSID; 596 header[1] = mat->rmap->N; 597 header[2] = N; 598 header[3] = MATRIX_BINARY_FORMAT_DENSE; 599 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 600 601 /* get largest work array needed for transposing array */ 602 mmax = mat->rmap->n; 603 for (i=1; i<size; i++) { 604 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 605 } 606 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 607 608 /* write out local array, by rows */ 609 m = mat->rmap->n; 610 v = a->v; 611 for (j=0; j<N; j++) { 612 for (i=0; i<m; i++) { 613 work[j + i*N] = *v++; 614 } 615 } 616 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 617 /* get largest work array to receive messages from other processes, excludes process zero */ 618 mmax = 0; 619 for (i=1; i<size; i++) { 620 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 621 } 622 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 623 for (k = 1; k < size; k++) { 624 v = vv; 625 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 626 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 627 628 for (j = 0; j < N; j++) { 629 for (i = 0; i < m; i++) { 630 work[j + i*N] = *v++; 631 } 632 } 633 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 634 } 635 ierr = PetscFree(work);CHKERRQ(ierr); 636 ierr = PetscFree(vv);CHKERRQ(ierr); 637 } else { 638 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 639 } 640 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 641 } 642 PetscFunctionReturn(0); 643 } 644 645 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 646 #include <petscdraw.h> 647 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 648 { 649 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 650 PetscErrorCode ierr; 651 PetscMPIInt rank = mdn->rank; 652 PetscViewerType vtype; 653 PetscBool iascii,isdraw; 654 PetscViewer sviewer; 655 PetscViewerFormat format; 656 657 PetscFunctionBegin; 658 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 659 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 660 if (iascii) { 661 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 662 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 663 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 664 MatInfo info; 665 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 666 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 667 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 668 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 669 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 670 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } else if (format == PETSC_VIEWER_ASCII_INFO) { 673 PetscFunctionReturn(0); 674 } 675 } else if (isdraw) { 676 PetscDraw draw; 677 PetscBool isnull; 678 679 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 680 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 681 if (isnull) PetscFunctionReturn(0); 682 } 683 684 { 685 /* assemble the entire matrix onto first processor. */ 686 Mat A; 687 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 688 PetscInt *cols; 689 PetscScalar *vals; 690 691 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 692 if (!rank) { 693 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 694 } else { 695 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 696 } 697 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 698 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 699 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 700 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 701 702 /* Copy the matrix ... This isn't the most efficient means, 703 but it's quick for now */ 704 A->insertmode = INSERT_VALUES; 705 706 row = mat->rmap->rstart; 707 m = mdn->A->rmap->n; 708 for (i=0; i<m; i++) { 709 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 710 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 711 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 712 row++; 713 } 714 715 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 716 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 717 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 718 if (!rank) { 719 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 720 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 721 } 722 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 723 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 724 ierr = MatDestroy(&A);CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 730 { 731 PetscErrorCode ierr; 732 PetscBool iascii,isbinary,isdraw,issocket; 733 734 PetscFunctionBegin; 735 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 736 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 737 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 739 740 if (iascii || issocket || isdraw) { 741 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 742 } else if (isbinary) { 743 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 749 { 750 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 751 Mat mdn = mat->A; 752 PetscErrorCode ierr; 753 PetscReal isend[5],irecv[5]; 754 755 PetscFunctionBegin; 756 info->block_size = 1.0; 757 758 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 759 760 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 761 isend[3] = info->memory; isend[4] = info->mallocs; 762 if (flag == MAT_LOCAL) { 763 info->nz_used = isend[0]; 764 info->nz_allocated = isend[1]; 765 info->nz_unneeded = isend[2]; 766 info->memory = isend[3]; 767 info->mallocs = isend[4]; 768 } else if (flag == MAT_GLOBAL_MAX) { 769 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 770 771 info->nz_used = irecv[0]; 772 info->nz_allocated = irecv[1]; 773 info->nz_unneeded = irecv[2]; 774 info->memory = irecv[3]; 775 info->mallocs = irecv[4]; 776 } else if (flag == MAT_GLOBAL_SUM) { 777 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 778 779 info->nz_used = irecv[0]; 780 info->nz_allocated = irecv[1]; 781 info->nz_unneeded = irecv[2]; 782 info->memory = irecv[3]; 783 info->mallocs = irecv[4]; 784 } 785 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 786 info->fill_ratio_needed = 0; 787 info->factor_mallocs = 0; 788 PetscFunctionReturn(0); 789 } 790 791 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 792 { 793 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 794 PetscErrorCode ierr; 795 796 PetscFunctionBegin; 797 switch (op) { 798 case MAT_NEW_NONZERO_LOCATIONS: 799 case MAT_NEW_NONZERO_LOCATION_ERR: 800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 801 MatCheckPreallocated(A,1); 802 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 803 break; 804 case MAT_ROW_ORIENTED: 805 MatCheckPreallocated(A,1); 806 a->roworiented = flg; 807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 808 break; 809 case MAT_NEW_DIAGONALS: 810 case MAT_KEEP_NONZERO_PATTERN: 811 case MAT_USE_HASH_TABLE: 812 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 813 break; 814 case MAT_IGNORE_OFF_PROC_ENTRIES: 815 a->donotstash = flg; 816 break; 817 case MAT_SYMMETRIC: 818 case MAT_STRUCTURALLY_SYMMETRIC: 819 case MAT_HERMITIAN: 820 case MAT_SYMMETRY_ETERNAL: 821 case MAT_IGNORE_LOWER_TRIANGULAR: 822 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 823 break; 824 default: 825 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 826 } 827 PetscFunctionReturn(0); 828 } 829 830 831 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 832 { 833 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 834 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 835 const PetscScalar *l,*r; 836 PetscScalar x,*v; 837 PetscErrorCode ierr; 838 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 839 840 PetscFunctionBegin; 841 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 842 if (ll) { 843 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 844 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 845 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 846 for (i=0; i<m; i++) { 847 x = l[i]; 848 v = mat->v + i; 849 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 850 } 851 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 852 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 853 } 854 if (rr) { 855 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 856 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 857 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 858 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 860 for (i=0; i<n; i++) { 861 x = r[i]; 862 v = mat->v + i*m; 863 for (j=0; j<m; j++) (*v++) *= x; 864 } 865 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 866 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 867 } 868 PetscFunctionReturn(0); 869 } 870 871 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 872 { 873 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 874 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 875 PetscErrorCode ierr; 876 PetscInt i,j; 877 PetscReal sum = 0.0; 878 PetscScalar *v = mat->v; 879 880 PetscFunctionBegin; 881 if (mdn->size == 1) { 882 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 883 } else { 884 if (type == NORM_FROBENIUS) { 885 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 886 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 887 } 888 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 889 *nrm = PetscSqrtReal(*nrm); 890 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 891 } else if (type == NORM_1) { 892 PetscReal *tmp,*tmp2; 893 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 894 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 895 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 896 *nrm = 0.0; 897 v = mat->v; 898 for (j=0; j<mdn->A->cmap->n; j++) { 899 for (i=0; i<mdn->A->rmap->n; i++) { 900 tmp[j] += PetscAbsScalar(*v); v++; 901 } 902 } 903 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 904 for (j=0; j<A->cmap->N; j++) { 905 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 906 } 907 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 908 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 909 } else if (type == NORM_INFINITY) { /* max row norm */ 910 PetscReal ntemp; 911 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 912 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 913 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 914 } 915 PetscFunctionReturn(0); 916 } 917 918 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 919 { 920 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 921 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 922 Mat B; 923 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 924 PetscErrorCode ierr; 925 PetscInt j,i; 926 PetscScalar *v; 927 928 PetscFunctionBegin; 929 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 930 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 931 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 932 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 933 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 934 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 935 } else { 936 B = *matout; 937 } 938 939 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 940 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 941 for (i=0; i<m; i++) rwork[i] = rstart + i; 942 for (j=0; j<n; j++) { 943 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 944 v += m; 945 } 946 ierr = PetscFree(rwork);CHKERRQ(ierr); 947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 949 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 950 *matout = B; 951 } else { 952 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 958 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 959 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 960 961 PetscErrorCode MatSetUp_MPIDense(Mat A) 962 { 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 971 { 972 PetscErrorCode ierr; 973 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 974 975 PetscFunctionBegin; 976 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 977 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatConjugate_MPIDense(Mat mat) 982 { 983 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 ierr = MatConjugate(a->A);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatRealPart_MPIDense(Mat A) 992 { 993 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 ierr = MatRealPart(a->A);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1002 { 1003 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1012 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1013 { 1014 PetscErrorCode ierr; 1015 PetscInt i,n; 1016 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1017 PetscReal *work; 1018 1019 PetscFunctionBegin; 1020 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1021 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1022 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1023 if (type == NORM_2) { 1024 for (i=0; i<n; i++) work[i] *= work[i]; 1025 } 1026 if (type == NORM_INFINITY) { 1027 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1028 } else { 1029 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1030 } 1031 ierr = PetscFree(work);CHKERRQ(ierr); 1032 if (type == NORM_2) { 1033 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1034 } 1035 PetscFunctionReturn(0); 1036 } 1037 1038 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1039 { 1040 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1041 PetscErrorCode ierr; 1042 PetscScalar *a; 1043 PetscInt m,n,i; 1044 1045 PetscFunctionBegin; 1046 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1047 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1048 for (i=0; i<m*n; i++) { 1049 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1050 } 1051 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1056 1057 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1058 { 1059 PetscFunctionBegin; 1060 *missing = PETSC_FALSE; 1061 PetscFunctionReturn(0); 1062 } 1063 1064 /* -------------------------------------------------------------------*/ 1065 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1066 MatGetRow_MPIDense, 1067 MatRestoreRow_MPIDense, 1068 MatMult_MPIDense, 1069 /* 4*/ MatMultAdd_MPIDense, 1070 MatMultTranspose_MPIDense, 1071 MatMultTransposeAdd_MPIDense, 1072 0, 1073 0, 1074 0, 1075 /* 10*/ 0, 1076 0, 1077 0, 1078 0, 1079 MatTranspose_MPIDense, 1080 /* 15*/ MatGetInfo_MPIDense, 1081 MatEqual_MPIDense, 1082 MatGetDiagonal_MPIDense, 1083 MatDiagonalScale_MPIDense, 1084 MatNorm_MPIDense, 1085 /* 20*/ MatAssemblyBegin_MPIDense, 1086 MatAssemblyEnd_MPIDense, 1087 MatSetOption_MPIDense, 1088 MatZeroEntries_MPIDense, 1089 /* 24*/ MatZeroRows_MPIDense, 1090 0, 1091 0, 1092 0, 1093 0, 1094 /* 29*/ MatSetUp_MPIDense, 1095 0, 1096 0, 1097 MatGetDiagonalBlock_MPIDense, 1098 0, 1099 /* 34*/ MatDuplicate_MPIDense, 1100 0, 1101 0, 1102 0, 1103 0, 1104 /* 39*/ MatAXPY_MPIDense, 1105 MatCreateSubMatrices_MPIDense, 1106 0, 1107 MatGetValues_MPIDense, 1108 0, 1109 /* 44*/ 0, 1110 MatScale_MPIDense, 1111 MatShift_Basic, 1112 0, 1113 0, 1114 /* 49*/ MatSetRandom_MPIDense, 1115 0, 1116 0, 1117 0, 1118 0, 1119 /* 54*/ 0, 1120 0, 1121 0, 1122 0, 1123 0, 1124 /* 59*/ MatCreateSubMatrix_MPIDense, 1125 MatDestroy_MPIDense, 1126 MatView_MPIDense, 1127 0, 1128 0, 1129 /* 64*/ 0, 1130 0, 1131 0, 1132 0, 1133 0, 1134 /* 69*/ 0, 1135 0, 1136 0, 1137 0, 1138 0, 1139 /* 74*/ 0, 1140 0, 1141 0, 1142 0, 1143 0, 1144 /* 79*/ 0, 1145 0, 1146 0, 1147 0, 1148 /* 83*/ MatLoad_MPIDense, 1149 0, 1150 0, 1151 0, 1152 0, 1153 0, 1154 #if defined(PETSC_HAVE_ELEMENTAL) 1155 /* 89*/ MatMatMult_MPIDense_MPIDense, 1156 MatMatMultSymbolic_MPIDense_MPIDense, 1157 #else 1158 /* 89*/ 0, 1159 0, 1160 #endif 1161 MatMatMultNumeric_MPIDense, 1162 0, 1163 0, 1164 /* 94*/ 0, 1165 0, 1166 0, 1167 0, 1168 0, 1169 /* 99*/ 0, 1170 0, 1171 0, 1172 MatConjugate_MPIDense, 1173 0, 1174 /*104*/ 0, 1175 MatRealPart_MPIDense, 1176 MatImaginaryPart_MPIDense, 1177 0, 1178 0, 1179 /*109*/ 0, 1180 0, 1181 0, 1182 0, 1183 MatMissingDiagonal_MPIDense, 1184 /*114*/ 0, 1185 0, 1186 0, 1187 0, 1188 0, 1189 /*119*/ 0, 1190 0, 1191 0, 1192 0, 1193 0, 1194 /*124*/ 0, 1195 MatGetColumnNorms_MPIDense, 1196 0, 1197 0, 1198 0, 1199 /*129*/ 0, 1200 MatTransposeMatMult_MPIDense_MPIDense, 1201 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1202 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1203 0, 1204 /*134*/ 0, 1205 0, 1206 0, 1207 0, 1208 0, 1209 /*139*/ 0, 1210 0, 1211 0 1212 }; 1213 1214 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1215 { 1216 Mat_MPIDense *a; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 mat->preallocated = PETSC_TRUE; 1221 /* Note: For now, when data is specified above, this assumes the user correctly 1222 allocates the local dense storage space. We should add error checking. */ 1223 1224 a = (Mat_MPIDense*)mat->data; 1225 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1226 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1227 a->nvec = mat->cmap->n; 1228 1229 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1230 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1231 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1232 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1233 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #if defined(PETSC_HAVE_ELEMENTAL) 1238 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1239 { 1240 Mat mat_elemental; 1241 PetscErrorCode ierr; 1242 PetscScalar *v; 1243 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1244 1245 PetscFunctionBegin; 1246 if (reuse == MAT_REUSE_MATRIX) { 1247 mat_elemental = *newmat; 1248 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1249 } else { 1250 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1251 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1252 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1253 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1254 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1255 } 1256 1257 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1258 for (i=0; i<N; i++) cols[i] = i; 1259 for (i=0; i<m; i++) rows[i] = rstart + i; 1260 1261 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1262 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1263 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1264 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1265 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1266 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1267 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1268 1269 if (reuse == MAT_INPLACE_MATRIX) { 1270 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1271 } else { 1272 *newmat = mat_elemental; 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 #endif 1277 1278 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1279 { 1280 Mat_MPIDense *a; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1285 mat->data = (void*)a; 1286 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1287 1288 mat->insertmode = NOT_SET_VALUES; 1289 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1290 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1291 1292 /* build cache for off array entries formed */ 1293 a->donotstash = PETSC_FALSE; 1294 1295 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1296 1297 /* stuff used for matrix vector multiply */ 1298 a->lvec = 0; 1299 a->Mvctx = 0; 1300 a->roworiented = PETSC_TRUE; 1301 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1306 #if defined(PETSC_HAVE_ELEMENTAL) 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1308 #endif 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1313 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1317 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /*MC 1322 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1323 1324 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1325 and MATMPIDENSE otherwise. 1326 1327 Options Database Keys: 1328 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1329 1330 Level: beginner 1331 1332 1333 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1334 M*/ 1335 1336 /*@C 1337 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1338 1339 Not collective 1340 1341 Input Parameters: 1342 . B - the matrix 1343 - data - optional location of matrix data. Set data=NULL for PETSc 1344 to control all matrix memory allocation. 1345 1346 Notes: 1347 The dense format is fully compatible with standard Fortran 77 1348 storage by columns. 1349 1350 The data input variable is intended primarily for Fortran programmers 1351 who wish to allocate their own matrix memory space. Most users should 1352 set data=NULL. 1353 1354 Level: intermediate 1355 1356 .keywords: matrix,dense, parallel 1357 1358 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1359 @*/ 1360 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1361 { 1362 PetscErrorCode ierr; 1363 1364 PetscFunctionBegin; 1365 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 /*@ 1370 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1371 array provided by the user. This is useful to avoid copying an array 1372 into a matrix 1373 1374 Not Collective 1375 1376 Input Parameters: 1377 + mat - the matrix 1378 - array - the array in column major order 1379 1380 Notes: 1381 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1382 freed when the matrix is destroyed. 1383 1384 Level: developer 1385 1386 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1387 1388 @*/ 1389 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1390 { 1391 PetscErrorCode ierr; 1392 PetscFunctionBegin; 1393 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1394 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 /*@ 1399 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1400 1401 Not Collective 1402 1403 Input Parameters: 1404 . mat - the matrix 1405 1406 Notes: 1407 You can only call this after a call to MatDensePlaceArray() 1408 1409 Level: developer 1410 1411 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1412 1413 @*/ 1414 PetscErrorCode MatDenseResetArray(Mat mat) 1415 { 1416 PetscErrorCode ierr; 1417 PetscFunctionBegin; 1418 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1419 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 /*@C 1424 MatCreateDense - Creates a parallel matrix in dense format. 1425 1426 Collective on MPI_Comm 1427 1428 Input Parameters: 1429 + comm - MPI communicator 1430 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1431 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1432 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1433 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1434 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1435 to control all matrix memory allocation. 1436 1437 Output Parameter: 1438 . A - the matrix 1439 1440 Notes: 1441 The dense format is fully compatible with standard Fortran 77 1442 storage by columns. 1443 1444 The data input variable is intended primarily for Fortran programmers 1445 who wish to allocate their own matrix memory space. Most users should 1446 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1447 1448 The user MUST specify either the local or global matrix dimensions 1449 (possibly both). 1450 1451 Level: intermediate 1452 1453 .keywords: matrix,dense, parallel 1454 1455 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1456 @*/ 1457 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1458 { 1459 PetscErrorCode ierr; 1460 PetscMPIInt size; 1461 1462 PetscFunctionBegin; 1463 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1464 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1465 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1466 if (size > 1) { 1467 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1468 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1469 if (data) { /* user provided data array, so no need to assemble */ 1470 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1471 (*A)->assembled = PETSC_TRUE; 1472 } 1473 } else { 1474 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1475 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1481 { 1482 Mat mat; 1483 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1484 PetscErrorCode ierr; 1485 1486 PetscFunctionBegin; 1487 *newmat = 0; 1488 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1489 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1490 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1491 a = (Mat_MPIDense*)mat->data; 1492 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1493 1494 mat->factortype = A->factortype; 1495 mat->assembled = PETSC_TRUE; 1496 mat->preallocated = PETSC_TRUE; 1497 1498 a->size = oldmat->size; 1499 a->rank = oldmat->rank; 1500 mat->insertmode = NOT_SET_VALUES; 1501 a->nvec = oldmat->nvec; 1502 a->donotstash = oldmat->donotstash; 1503 1504 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1505 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1506 1507 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1508 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1509 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1510 1511 *newmat = mat; 1512 PetscFunctionReturn(0); 1513 } 1514 1515 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1516 { 1517 PetscErrorCode ierr; 1518 PetscMPIInt rank,size; 1519 const PetscInt *rowners; 1520 PetscInt i,m,n,nz,j,mMax; 1521 PetscScalar *array,*vals,*vals_ptr; 1522 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1523 1524 PetscFunctionBegin; 1525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1527 1528 /* determine ownership of rows and columns */ 1529 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1530 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1531 1532 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1533 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1534 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1535 } 1536 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1537 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1538 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1539 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1540 if (!rank) { 1541 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1542 1543 /* read in my part of the matrix numerical values */ 1544 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1545 1546 /* insert into matrix-by row (this is why cannot directly read into array */ 1547 vals_ptr = vals; 1548 for (i=0; i<m; i++) { 1549 for (j=0; j<N; j++) { 1550 array[i + j*m] = *vals_ptr++; 1551 } 1552 } 1553 1554 /* read in other processors and ship out */ 1555 for (i=1; i<size; i++) { 1556 nz = (rowners[i+1] - rowners[i])*N; 1557 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1558 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1559 } 1560 } else { 1561 /* receive numeric values */ 1562 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1563 1564 /* receive message of values*/ 1565 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1566 1567 /* insert into matrix-by row (this is why cannot directly read into array */ 1568 vals_ptr = vals; 1569 for (i=0; i<m; i++) { 1570 for (j=0; j<N; j++) { 1571 array[i + j*m] = *vals_ptr++; 1572 } 1573 } 1574 } 1575 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1576 ierr = PetscFree(vals);CHKERRQ(ierr); 1577 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1578 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1583 { 1584 Mat_MPIDense *a; 1585 PetscScalar *vals,*svals; 1586 MPI_Comm comm; 1587 MPI_Status status; 1588 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1589 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1590 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1591 PetscInt i,nz,j,rstart,rend; 1592 int fd; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 /* force binary viewer to load .info file if it has not yet done so */ 1597 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1598 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1599 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1600 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1601 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1602 if (!rank) { 1603 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1604 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1605 } 1606 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1607 M = header[1]; N = header[2]; nz = header[3]; 1608 1609 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1610 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1611 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1612 1613 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1614 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1615 1616 /* 1617 Handle case where matrix is stored on disk as a dense matrix 1618 */ 1619 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1620 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 /* determine ownership of all rows */ 1625 if (newmat->rmap->n < 0) { 1626 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1627 } else { 1628 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1629 } 1630 if (newmat->cmap->n < 0) { 1631 n = PETSC_DECIDE; 1632 } else { 1633 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1634 } 1635 1636 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1637 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1638 rowners[0] = 0; 1639 for (i=2; i<=size; i++) { 1640 rowners[i] += rowners[i-1]; 1641 } 1642 rstart = rowners[rank]; 1643 rend = rowners[rank+1]; 1644 1645 /* distribute row lengths to all processors */ 1646 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1647 if (!rank) { 1648 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1649 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1651 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1652 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1653 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1654 } else { 1655 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1656 } 1657 1658 if (!rank) { 1659 /* calculate the number of nonzeros on each processor */ 1660 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1661 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1662 for (i=0; i<size; i++) { 1663 for (j=rowners[i]; j< rowners[i+1]; j++) { 1664 procsnz[i] += rowlengths[j]; 1665 } 1666 } 1667 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1668 1669 /* determine max buffer needed and allocate it */ 1670 maxnz = 0; 1671 for (i=0; i<size; i++) { 1672 maxnz = PetscMax(maxnz,procsnz[i]); 1673 } 1674 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1675 1676 /* read in my part of the matrix column indices */ 1677 nz = procsnz[0]; 1678 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1679 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1680 1681 /* read in every one elses and ship off */ 1682 for (i=1; i<size; i++) { 1683 nz = procsnz[i]; 1684 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1685 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 } else { 1689 /* determine buffer space needed for message */ 1690 nz = 0; 1691 for (i=0; i<m; i++) { 1692 nz += ourlens[i]; 1693 } 1694 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1695 1696 /* receive message of column indices*/ 1697 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1698 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1699 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1700 } 1701 1702 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1703 a = (Mat_MPIDense*)newmat->data; 1704 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1705 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1706 } 1707 1708 if (!rank) { 1709 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1710 1711 /* read in my part of the matrix numerical values */ 1712 nz = procsnz[0]; 1713 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1714 1715 /* insert into matrix */ 1716 jj = rstart; 1717 smycols = mycols; 1718 svals = vals; 1719 for (i=0; i<m; i++) { 1720 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1721 smycols += ourlens[i]; 1722 svals += ourlens[i]; 1723 jj++; 1724 } 1725 1726 /* read in other processors and ship out */ 1727 for (i=1; i<size; i++) { 1728 nz = procsnz[i]; 1729 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1730 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1731 } 1732 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1733 } else { 1734 /* receive numeric values */ 1735 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1736 1737 /* receive message of values*/ 1738 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1739 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1740 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1741 1742 /* insert into matrix */ 1743 jj = rstart; 1744 smycols = mycols; 1745 svals = vals; 1746 for (i=0; i<m; i++) { 1747 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1748 smycols += ourlens[i]; 1749 svals += ourlens[i]; 1750 jj++; 1751 } 1752 } 1753 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1754 ierr = PetscFree(vals);CHKERRQ(ierr); 1755 ierr = PetscFree(mycols);CHKERRQ(ierr); 1756 ierr = PetscFree(rowners);CHKERRQ(ierr); 1757 1758 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1759 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1764 { 1765 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1766 Mat a,b; 1767 PetscBool flg; 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 a = matA->A; 1772 b = matB->A; 1773 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1774 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1779 { 1780 PetscErrorCode ierr; 1781 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1782 Mat_TransMatMultDense *atb = a->atbdense; 1783 1784 PetscFunctionBegin; 1785 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1786 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1787 ierr = PetscFree(atb);CHKERRQ(ierr); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1792 { 1793 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1794 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1795 Mat_TransMatMultDense *atb = c->atbdense; 1796 PetscErrorCode ierr; 1797 MPI_Comm comm; 1798 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1799 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1800 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1801 PetscScalar _DOne=1.0,_DZero=0.0; 1802 PetscBLASInt am,an,bn,aN; 1803 const PetscInt *ranges; 1804 1805 PetscFunctionBegin; 1806 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1807 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1808 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1809 1810 /* compute atbarray = aseq^T * bseq */ 1811 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1812 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1813 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1814 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1815 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1816 1817 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1818 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1819 1820 /* arrange atbarray into sendbuf */ 1821 k = 0; 1822 for (proc=0; proc<size; proc++) { 1823 for (j=0; j<cN; j++) { 1824 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1825 } 1826 } 1827 /* sum all atbarray to local values of C */ 1828 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1829 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1830 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1835 { 1836 PetscErrorCode ierr; 1837 Mat Cdense; 1838 MPI_Comm comm; 1839 PetscMPIInt size; 1840 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1841 Mat_MPIDense *c; 1842 Mat_TransMatMultDense *atb; 1843 1844 PetscFunctionBegin; 1845 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1846 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1847 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1848 } 1849 1850 /* create matrix product Cdense */ 1851 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1852 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1853 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1854 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1855 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1856 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1857 *C = Cdense; 1858 1859 /* create data structure for reuse Cdense */ 1860 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1861 ierr = PetscNew(&atb);CHKERRQ(ierr); 1862 cM = Cdense->rmap->N; 1863 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1864 1865 c = (Mat_MPIDense*)Cdense->data; 1866 c->atbdense = atb; 1867 atb->destroy = Cdense->ops->destroy; 1868 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1869 PetscFunctionReturn(0); 1870 } 1871 1872 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1873 { 1874 PetscErrorCode ierr; 1875 1876 PetscFunctionBegin; 1877 if (scall == MAT_INITIAL_MATRIX) { 1878 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1879 } 1880 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1885 { 1886 PetscErrorCode ierr; 1887 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1888 Mat_MatMultDense *ab = a->abdense; 1889 1890 PetscFunctionBegin; 1891 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1892 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1893 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1894 1895 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1896 ierr = PetscFree(ab);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #if defined(PETSC_HAVE_ELEMENTAL) 1901 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1902 { 1903 PetscErrorCode ierr; 1904 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1905 Mat_MatMultDense *ab=c->abdense; 1906 1907 PetscFunctionBegin; 1908 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1909 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1910 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1911 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1916 { 1917 PetscErrorCode ierr; 1918 Mat Ae,Be,Ce; 1919 Mat_MPIDense *c; 1920 Mat_MatMultDense *ab; 1921 1922 PetscFunctionBegin; 1923 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1924 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1925 } 1926 1927 /* convert A and B to Elemental matrices Ae and Be */ 1928 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1929 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1930 1931 /* Ce = Ae*Be */ 1932 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1933 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1934 1935 /* convert Ce to C */ 1936 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1937 1938 /* create data structure for reuse Cdense */ 1939 ierr = PetscNew(&ab);CHKERRQ(ierr); 1940 c = (Mat_MPIDense*)(*C)->data; 1941 c->abdense = ab; 1942 1943 ab->Ae = Ae; 1944 ab->Be = Be; 1945 ab->Ce = Ce; 1946 ab->destroy = (*C)->ops->destroy; 1947 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1948 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1958 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1959 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1960 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1961 } else { 1962 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1963 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1964 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1965 } 1966 PetscFunctionReturn(0); 1967 } 1968 #endif 1969