1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 188 { 189 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 190 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 191 PetscErrorCode ierr; 192 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 193 const PetscInt *irow,*icol; 194 PetscScalar *av,*bv,*v = lmat->v; 195 Mat newmat; 196 IS iscol_local; 197 MPI_Comm comm_is,comm_mat; 198 199 PetscFunctionBegin; 200 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 201 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 202 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 203 204 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 205 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 206 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 207 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 208 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 209 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 210 211 /* No parallel redistribution currently supported! Should really check each index set 212 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 213 original matrix! */ 214 215 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 216 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 217 218 /* Check submatrix call */ 219 if (scall == MAT_REUSE_MATRIX) { 220 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 221 /* Really need to test rows and column sizes! */ 222 newmat = *B; 223 } else { 224 /* Create and fill new matrix */ 225 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 226 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 227 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 228 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 229 } 230 231 /* Now extract the data pointers and do the copy, column at a time */ 232 newmatd = (Mat_MPIDense*)newmat->data; 233 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 234 235 for (i=0; i<Ncols; i++) { 236 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 237 for (j=0; j<nrows; j++) { 238 *bv++ = av[irow[j] - rstart]; 239 } 240 } 241 242 /* Assemble the matrices so that the correct flags are set */ 243 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 244 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 245 246 /* Free work space */ 247 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 248 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 249 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 250 *B = newmat; 251 PetscFunctionReturn(0); 252 } 253 254 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 255 { 256 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 257 PetscErrorCode ierr; 258 259 PetscFunctionBegin; 260 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 275 { 276 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 277 MPI_Comm comm; 278 PetscErrorCode ierr; 279 PetscInt nstash,reallocs; 280 InsertMode addv; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 284 /* make sure all processors are either in INSERTMODE or ADDMODE */ 285 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 286 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 287 mat->insertmode = addv; /* in case this processor had no cache */ 288 289 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 290 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 291 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 296 { 297 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 298 PetscErrorCode ierr; 299 PetscInt i,*row,*col,flg,j,rstart,ncols; 300 PetscMPIInt n; 301 PetscScalar *val; 302 303 PetscFunctionBegin; 304 /* wait on receives */ 305 while (1) { 306 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 307 if (!flg) break; 308 309 for (i=0; i<n;) { 310 /* Now identify the consecutive vals belonging to the same row */ 311 for (j=i,rstart=row[j]; j<n; j++) { 312 if (row[j] != rstart) break; 313 } 314 if (j < n) ncols = j-i; 315 else ncols = n-i; 316 /* Now assemble all these values with a single function call */ 317 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 318 i = j; 319 } 320 } 321 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 322 323 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 324 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 325 326 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 327 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 332 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 333 { 334 PetscErrorCode ierr; 335 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 336 337 PetscFunctionBegin; 338 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 342 /* the code does not do the diagonal entries correctly unless the 343 matrix is square and the column and row owerships are identical. 344 This is a BUG. The only way to fix it seems to be to access 345 mdn->A and mdn->B directly and not through the MatZeroRows() 346 routine. 347 */ 348 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 349 { 350 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 351 PetscErrorCode ierr; 352 PetscInt i,*owners = A->rmap->range; 353 PetscInt *sizes,j,idx,nsends; 354 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 355 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 356 PetscInt *lens,*lrows,*values; 357 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 358 MPI_Comm comm; 359 MPI_Request *send_waits,*recv_waits; 360 MPI_Status recv_status,*send_status; 361 PetscBool found; 362 const PetscScalar *xx; 363 PetscScalar *bb; 364 365 PetscFunctionBegin; 366 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 367 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 368 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 369 /* first count number of contributors to each processor */ 370 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 371 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 372 for (i=0; i<N; i++) { 373 idx = rows[i]; 374 found = PETSC_FALSE; 375 for (j=0; j<size; j++) { 376 if (idx >= owners[j] && idx < owners[j+1]) { 377 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 378 } 379 } 380 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 381 } 382 nsends = 0; 383 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 384 385 /* inform other processors of number of messages and max length*/ 386 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 387 388 /* post receives: */ 389 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 390 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 391 for (i=0; i<nrecvs; i++) { 392 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 393 } 394 395 /* do sends: 396 1) starts[i] gives the starting index in svalues for stuff going to 397 the ith processor 398 */ 399 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 400 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 401 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 402 403 starts[0] = 0; 404 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 405 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 406 407 starts[0] = 0; 408 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 409 count = 0; 410 for (i=0; i<size; i++) { 411 if (sizes[2*i+1]) { 412 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 413 } 414 } 415 ierr = PetscFree(starts);CHKERRQ(ierr); 416 417 base = owners[rank]; 418 419 /* wait on receives */ 420 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 421 count = nrecvs; 422 slen = 0; 423 while (count) { 424 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 425 /* unpack receives into our local space */ 426 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 427 428 source[imdex] = recv_status.MPI_SOURCE; 429 lens[imdex] = n; 430 slen += n; 431 count--; 432 } 433 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 434 435 /* move the data into the send scatter */ 436 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 437 count = 0; 438 for (i=0; i<nrecvs; i++) { 439 values = rvalues + i*nmax; 440 for (j=0; j<lens[i]; j++) { 441 lrows[count++] = values[j] - base; 442 } 443 } 444 ierr = PetscFree(rvalues);CHKERRQ(ierr); 445 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 446 ierr = PetscFree(owner);CHKERRQ(ierr); 447 ierr = PetscFree(sizes);CHKERRQ(ierr); 448 449 /* fix right hand side if needed */ 450 if (x && b) { 451 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 452 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 453 for (i=0; i<slen; i++) { 454 bb[lrows[i]] = diag*xx[lrows[i]]; 455 } 456 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 457 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 458 } 459 460 /* actually zap the local rows */ 461 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 462 if (diag != 0.0) { 463 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 464 PetscInt m = ll->lda, i; 465 466 for (i=0; i<slen; i++) { 467 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 468 } 469 } 470 ierr = PetscFree(lrows);CHKERRQ(ierr); 471 472 /* wait on sends */ 473 if (nsends) { 474 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 475 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 476 ierr = PetscFree(send_status);CHKERRQ(ierr); 477 } 478 ierr = PetscFree(send_waits);CHKERRQ(ierr); 479 ierr = PetscFree(svalues);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 484 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 485 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 486 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 487 488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 489 { 490 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 491 PetscErrorCode ierr; 492 493 PetscFunctionBegin; 494 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 495 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 501 { 502 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 507 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 508 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 513 { 514 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 515 PetscErrorCode ierr; 516 PetscScalar zero = 0.0; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,zero);CHKERRQ(ierr); 520 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 522 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 527 { 528 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 529 PetscErrorCode ierr; 530 531 PetscFunctionBegin; 532 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 533 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 534 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 535 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 543 PetscErrorCode ierr; 544 PetscInt len,i,n,m = A->rmap->n,radd; 545 PetscScalar *x,zero = 0.0; 546 547 PetscFunctionBegin; 548 ierr = VecSet(v,zero);CHKERRQ(ierr); 549 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 550 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 551 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 552 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 553 radd = A->rmap->rstart*m; 554 for (i=0; i<len; i++) { 555 x[i] = aloc->v[radd + i*m + i]; 556 } 557 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 PetscErrorCode MatDestroy_MPIDense(Mat mat) 562 { 563 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 564 PetscErrorCode ierr; 565 566 PetscFunctionBegin; 567 #if defined(PETSC_USE_LOG) 568 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 569 #endif 570 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 571 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 572 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 574 575 ierr = PetscFree(mat->data);CHKERRQ(ierr); 576 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 577 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 584 #if defined(PETSC_HAVE_ELEMENTAL) 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 586 #endif 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 600 { 601 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 602 PetscErrorCode ierr; 603 PetscViewerFormat format; 604 int fd; 605 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 606 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 607 PetscScalar *work,*v,*vv; 608 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 609 610 PetscFunctionBegin; 611 if (mdn->size == 1) { 612 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 613 } else { 614 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 615 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 616 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 617 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_NATIVE) { 620 621 if (!rank) { 622 /* store the matrix as a dense matrix */ 623 header[0] = MAT_FILE_CLASSID; 624 header[1] = mat->rmap->N; 625 header[2] = N; 626 header[3] = MATRIX_BINARY_FORMAT_DENSE; 627 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 628 629 /* get largest work array needed for transposing array */ 630 mmax = mat->rmap->n; 631 for (i=1; i<size; i++) { 632 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 633 } 634 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 635 636 /* write out local array, by rows */ 637 m = mat->rmap->n; 638 v = a->v; 639 for (j=0; j<N; j++) { 640 for (i=0; i<m; i++) { 641 work[j + i*N] = *v++; 642 } 643 } 644 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 645 /* get largest work array to receive messages from other processes, excludes process zero */ 646 mmax = 0; 647 for (i=1; i<size; i++) { 648 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 649 } 650 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 651 for (k = 1; k < size; k++) { 652 v = vv; 653 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 654 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 655 656 for (j = 0; j < N; j++) { 657 for (i = 0; i < m; i++) { 658 work[j + i*N] = *v++; 659 } 660 } 661 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 662 } 663 ierr = PetscFree(work);CHKERRQ(ierr); 664 ierr = PetscFree(vv);CHKERRQ(ierr); 665 } else { 666 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 667 } 668 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 669 } 670 PetscFunctionReturn(0); 671 } 672 673 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 674 #include <petscdraw.h> 675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 676 { 677 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 678 PetscErrorCode ierr; 679 PetscMPIInt rank = mdn->rank; 680 PetscViewerType vtype; 681 PetscBool iascii,isdraw; 682 PetscViewer sviewer; 683 PetscViewerFormat format; 684 685 PetscFunctionBegin; 686 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 687 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 688 if (iascii) { 689 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 690 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 691 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 692 MatInfo info; 693 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 694 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 695 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 697 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 698 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } else if (format == PETSC_VIEWER_ASCII_INFO) { 701 PetscFunctionReturn(0); 702 } 703 } else if (isdraw) { 704 PetscDraw draw; 705 PetscBool isnull; 706 707 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 708 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 709 if (isnull) PetscFunctionReturn(0); 710 } 711 712 { 713 /* assemble the entire matrix onto first processor. */ 714 Mat A; 715 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 716 PetscInt *cols; 717 PetscScalar *vals; 718 719 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 720 if (!rank) { 721 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 722 } else { 723 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 724 } 725 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 726 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 727 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 728 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 729 730 /* Copy the matrix ... This isn't the most efficient means, 731 but it's quick for now */ 732 A->insertmode = INSERT_VALUES; 733 734 row = mat->rmap->rstart; 735 m = mdn->A->rmap->n; 736 for (i=0; i<m; i++) { 737 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 738 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 739 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 740 row++; 741 } 742 743 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 744 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 746 if (!rank) { 747 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 748 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 749 } 750 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 752 ierr = MatDestroy(&A);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 758 { 759 PetscErrorCode ierr; 760 PetscBool iascii,isbinary,isdraw,issocket; 761 762 PetscFunctionBegin; 763 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 764 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 765 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 766 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 767 768 if (iascii || issocket || isdraw) { 769 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 770 } else if (isbinary) { 771 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 772 } 773 PetscFunctionReturn(0); 774 } 775 776 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 777 { 778 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 779 Mat mdn = mat->A; 780 PetscErrorCode ierr; 781 PetscReal isend[5],irecv[5]; 782 783 PetscFunctionBegin; 784 info->block_size = 1.0; 785 786 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 787 788 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 789 isend[3] = info->memory; isend[4] = info->mallocs; 790 if (flag == MAT_LOCAL) { 791 info->nz_used = isend[0]; 792 info->nz_allocated = isend[1]; 793 info->nz_unneeded = isend[2]; 794 info->memory = isend[3]; 795 info->mallocs = isend[4]; 796 } else if (flag == MAT_GLOBAL_MAX) { 797 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 798 799 info->nz_used = irecv[0]; 800 info->nz_allocated = irecv[1]; 801 info->nz_unneeded = irecv[2]; 802 info->memory = irecv[3]; 803 info->mallocs = irecv[4]; 804 } else if (flag == MAT_GLOBAL_SUM) { 805 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 806 807 info->nz_used = irecv[0]; 808 info->nz_allocated = irecv[1]; 809 info->nz_unneeded = irecv[2]; 810 info->memory = irecv[3]; 811 info->mallocs = irecv[4]; 812 } 813 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 814 info->fill_ratio_needed = 0; 815 info->factor_mallocs = 0; 816 PetscFunctionReturn(0); 817 } 818 819 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 820 { 821 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 switch (op) { 826 case MAT_NEW_NONZERO_LOCATIONS: 827 case MAT_NEW_NONZERO_LOCATION_ERR: 828 case MAT_NEW_NONZERO_ALLOCATION_ERR: 829 MatCheckPreallocated(A,1); 830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 831 break; 832 case MAT_ROW_ORIENTED: 833 MatCheckPreallocated(A,1); 834 a->roworiented = flg; 835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 836 break; 837 case MAT_NEW_DIAGONALS: 838 case MAT_KEEP_NONZERO_PATTERN: 839 case MAT_USE_HASH_TABLE: 840 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 841 break; 842 case MAT_IGNORE_OFF_PROC_ENTRIES: 843 a->donotstash = flg; 844 break; 845 case MAT_SYMMETRIC: 846 case MAT_STRUCTURALLY_SYMMETRIC: 847 case MAT_HERMITIAN: 848 case MAT_SYMMETRY_ETERNAL: 849 case MAT_IGNORE_LOWER_TRIANGULAR: 850 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 851 break; 852 default: 853 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 854 } 855 PetscFunctionReturn(0); 856 } 857 858 859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 860 { 861 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 862 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 863 const PetscScalar *l,*r; 864 PetscScalar x,*v; 865 PetscErrorCode ierr; 866 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 867 868 PetscFunctionBegin; 869 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 870 if (ll) { 871 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 872 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 873 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 874 for (i=0; i<m; i++) { 875 x = l[i]; 876 v = mat->v + i; 877 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 878 } 879 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 880 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 881 } 882 if (rr) { 883 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 884 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 885 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 888 for (i=0; i<n; i++) { 889 x = r[i]; 890 v = mat->v + i*m; 891 for (j=0; j<m; j++) (*v++) *= x; 892 } 893 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 894 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 895 } 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 900 { 901 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 902 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 903 PetscErrorCode ierr; 904 PetscInt i,j; 905 PetscReal sum = 0.0; 906 PetscScalar *v = mat->v; 907 908 PetscFunctionBegin; 909 if (mdn->size == 1) { 910 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 911 } else { 912 if (type == NORM_FROBENIUS) { 913 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 914 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 915 } 916 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 917 *nrm = PetscSqrtReal(*nrm); 918 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 919 } else if (type == NORM_1) { 920 PetscReal *tmp,*tmp2; 921 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 922 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 923 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 924 *nrm = 0.0; 925 v = mat->v; 926 for (j=0; j<mdn->A->cmap->n; j++) { 927 for (i=0; i<mdn->A->rmap->n; i++) { 928 tmp[j] += PetscAbsScalar(*v); v++; 929 } 930 } 931 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 932 for (j=0; j<A->cmap->N; j++) { 933 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 934 } 935 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 936 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 937 } else if (type == NORM_INFINITY) { /* max row norm */ 938 PetscReal ntemp; 939 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 940 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 942 } 943 PetscFunctionReturn(0); 944 } 945 946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 947 { 948 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 949 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 950 Mat B; 951 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 952 PetscErrorCode ierr; 953 PetscInt j,i; 954 PetscScalar *v; 955 956 PetscFunctionBegin; 957 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 958 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 959 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 960 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 961 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 962 } else { 963 B = *matout; 964 } 965 966 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 967 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 968 for (i=0; i<m; i++) rwork[i] = rstart + i; 969 for (j=0; j<n; j++) { 970 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 971 v += m; 972 } 973 ierr = PetscFree(rwork);CHKERRQ(ierr); 974 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 975 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 977 *matout = B; 978 } else { 979 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 980 } 981 PetscFunctionReturn(0); 982 } 983 984 985 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 986 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 987 988 PetscErrorCode MatSetUp_MPIDense(Mat A) 989 { 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 998 { 999 PetscErrorCode ierr; 1000 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1001 1002 PetscFunctionBegin; 1003 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1004 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1009 { 1010 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 PetscErrorCode MatRealPart_MPIDense(Mat A) 1019 { 1020 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1029 { 1030 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1040 { 1041 PetscErrorCode ierr; 1042 PetscInt i,n; 1043 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1044 PetscReal *work; 1045 1046 PetscFunctionBegin; 1047 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1049 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1050 if (type == NORM_2) { 1051 for (i=0; i<n; i++) work[i] *= work[i]; 1052 } 1053 if (type == NORM_INFINITY) { 1054 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1055 } else { 1056 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1057 } 1058 ierr = PetscFree(work);CHKERRQ(ierr); 1059 if (type == NORM_2) { 1060 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1066 { 1067 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1068 PetscErrorCode ierr; 1069 PetscScalar *a; 1070 PetscInt m,n,i; 1071 1072 PetscFunctionBegin; 1073 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1074 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1075 for (i=0; i<m*n; i++) { 1076 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1077 } 1078 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1083 1084 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1085 { 1086 PetscFunctionBegin; 1087 *missing = PETSC_FALSE; 1088 PetscFunctionReturn(0); 1089 } 1090 1091 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1092 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1093 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1094 1095 /* -------------------------------------------------------------------*/ 1096 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1097 MatGetRow_MPIDense, 1098 MatRestoreRow_MPIDense, 1099 MatMult_MPIDense, 1100 /* 4*/ MatMultAdd_MPIDense, 1101 MatMultTranspose_MPIDense, 1102 MatMultTransposeAdd_MPIDense, 1103 0, 1104 0, 1105 0, 1106 /* 10*/ 0, 1107 0, 1108 0, 1109 0, 1110 MatTranspose_MPIDense, 1111 /* 15*/ MatGetInfo_MPIDense, 1112 MatEqual_MPIDense, 1113 MatGetDiagonal_MPIDense, 1114 MatDiagonalScale_MPIDense, 1115 MatNorm_MPIDense, 1116 /* 20*/ MatAssemblyBegin_MPIDense, 1117 MatAssemblyEnd_MPIDense, 1118 MatSetOption_MPIDense, 1119 MatZeroEntries_MPIDense, 1120 /* 24*/ MatZeroRows_MPIDense, 1121 0, 1122 0, 1123 0, 1124 0, 1125 /* 29*/ MatSetUp_MPIDense, 1126 0, 1127 0, 1128 MatGetDiagonalBlock_MPIDense, 1129 0, 1130 /* 34*/ MatDuplicate_MPIDense, 1131 0, 1132 0, 1133 0, 1134 0, 1135 /* 39*/ MatAXPY_MPIDense, 1136 MatCreateSubMatrices_MPIDense, 1137 0, 1138 MatGetValues_MPIDense, 1139 0, 1140 /* 44*/ 0, 1141 MatScale_MPIDense, 1142 MatShift_Basic, 1143 0, 1144 0, 1145 /* 49*/ MatSetRandom_MPIDense, 1146 0, 1147 0, 1148 0, 1149 0, 1150 /* 54*/ 0, 1151 0, 1152 0, 1153 0, 1154 0, 1155 /* 59*/ MatCreateSubMatrix_MPIDense, 1156 MatDestroy_MPIDense, 1157 MatView_MPIDense, 1158 0, 1159 0, 1160 /* 64*/ 0, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /* 69*/ 0, 1166 0, 1167 0, 1168 0, 1169 0, 1170 /* 74*/ 0, 1171 0, 1172 0, 1173 0, 1174 0, 1175 /* 79*/ 0, 1176 0, 1177 0, 1178 0, 1179 /* 83*/ MatLoad_MPIDense, 1180 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 #if defined(PETSC_HAVE_ELEMENTAL) 1186 /* 89*/ MatMatMult_MPIDense_MPIDense, 1187 MatMatMultSymbolic_MPIDense_MPIDense, 1188 #else 1189 /* 89*/ 0, 1190 0, 1191 #endif 1192 MatMatMultNumeric_MPIDense, 1193 0, 1194 0, 1195 /* 94*/ 0, 1196 MatMatTransposeMult_MPIDense_MPIDense, 1197 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1198 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1199 0, 1200 /* 99*/ 0, 1201 0, 1202 0, 1203 MatConjugate_MPIDense, 1204 0, 1205 /*104*/ 0, 1206 MatRealPart_MPIDense, 1207 MatImaginaryPart_MPIDense, 1208 0, 1209 0, 1210 /*109*/ 0, 1211 0, 1212 0, 1213 0, 1214 MatMissingDiagonal_MPIDense, 1215 /*114*/ 0, 1216 0, 1217 0, 1218 0, 1219 0, 1220 /*119*/ 0, 1221 0, 1222 0, 1223 0, 1224 0, 1225 /*124*/ 0, 1226 MatGetColumnNorms_MPIDense, 1227 0, 1228 0, 1229 0, 1230 /*129*/ 0, 1231 MatTransposeMatMult_MPIDense_MPIDense, 1232 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1233 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1234 0, 1235 /*134*/ 0, 1236 0, 1237 0, 1238 0, 1239 0, 1240 /*139*/ 0, 1241 0, 1242 0, 1243 0, 1244 0, 1245 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1246 }; 1247 1248 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1249 { 1250 Mat_MPIDense *a; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1255 mat->preallocated = PETSC_TRUE; 1256 /* Note: For now, when data is specified above, this assumes the user correctly 1257 allocates the local dense storage space. We should add error checking. */ 1258 1259 a = (Mat_MPIDense*)mat->data; 1260 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1261 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1262 a->nvec = mat->cmap->n; 1263 1264 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1265 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1266 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1267 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1268 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #if defined(PETSC_HAVE_ELEMENTAL) 1273 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1274 { 1275 Mat mat_elemental; 1276 PetscErrorCode ierr; 1277 PetscScalar *v; 1278 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1279 1280 PetscFunctionBegin; 1281 if (reuse == MAT_REUSE_MATRIX) { 1282 mat_elemental = *newmat; 1283 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1284 } else { 1285 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1286 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1287 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1288 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1289 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1290 } 1291 1292 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1293 for (i=0; i<N; i++) cols[i] = i; 1294 for (i=0; i<m; i++) rows[i] = rstart + i; 1295 1296 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1297 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1298 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1299 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1300 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1301 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1302 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1303 1304 if (reuse == MAT_INPLACE_MATRIX) { 1305 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1306 } else { 1307 *newmat = mat_elemental; 1308 } 1309 PetscFunctionReturn(0); 1310 } 1311 #endif 1312 1313 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1314 { 1315 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1324 { 1325 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1334 { 1335 PetscErrorCode ierr; 1336 Mat_MPIDense *mat; 1337 PetscInt m,nloc,N; 1338 1339 PetscFunctionBegin; 1340 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1341 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1342 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1343 PetscInt sum; 1344 1345 if (n == PETSC_DECIDE) { 1346 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1347 } 1348 /* Check sum(n) = N */ 1349 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1350 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1351 1352 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1353 } 1354 1355 /* numeric phase */ 1356 mat = (Mat_MPIDense*)(*outmat)->data; 1357 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1358 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1359 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1364 { 1365 Mat_MPIDense *a; 1366 PetscErrorCode ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1370 mat->data = (void*)a; 1371 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1372 1373 mat->insertmode = NOT_SET_VALUES; 1374 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1375 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1376 1377 /* build cache for off array entries formed */ 1378 a->donotstash = PETSC_FALSE; 1379 1380 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1381 1382 /* stuff used for matrix vector multiply */ 1383 a->lvec = 0; 1384 a->Mvctx = 0; 1385 a->roworiented = PETSC_TRUE; 1386 1387 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1388 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1389 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1390 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1391 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1392 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1393 #if defined(PETSC_HAVE_ELEMENTAL) 1394 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1395 #endif 1396 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1397 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1398 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1399 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1400 1401 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1402 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1403 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1404 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1405 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1406 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 /*MC 1411 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1412 1413 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1414 and MATMPIDENSE otherwise. 1415 1416 Options Database Keys: 1417 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1418 1419 Level: beginner 1420 1421 1422 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1423 M*/ 1424 1425 /*@C 1426 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1427 1428 Not collective 1429 1430 Input Parameters: 1431 . B - the matrix 1432 - data - optional location of matrix data. Set data=NULL for PETSc 1433 to control all matrix memory allocation. 1434 1435 Notes: 1436 The dense format is fully compatible with standard Fortran 77 1437 storage by columns. 1438 1439 The data input variable is intended primarily for Fortran programmers 1440 who wish to allocate their own matrix memory space. Most users should 1441 set data=NULL. 1442 1443 Level: intermediate 1444 1445 .keywords: matrix,dense, parallel 1446 1447 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1448 @*/ 1449 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1450 { 1451 PetscErrorCode ierr; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 /*@ 1459 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1460 array provided by the user. This is useful to avoid copying an array 1461 into a matrix 1462 1463 Not Collective 1464 1465 Input Parameters: 1466 + mat - the matrix 1467 - array - the array in column major order 1468 1469 Notes: 1470 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1471 freed when the matrix is destroyed. 1472 1473 Level: developer 1474 1475 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1476 1477 @*/ 1478 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1479 { 1480 PetscErrorCode ierr; 1481 PetscFunctionBegin; 1482 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1483 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 /*@ 1488 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1489 1490 Not Collective 1491 1492 Input Parameters: 1493 . mat - the matrix 1494 1495 Notes: 1496 You can only call this after a call to MatDensePlaceArray() 1497 1498 Level: developer 1499 1500 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1501 1502 @*/ 1503 PetscErrorCode MatDenseResetArray(Mat mat) 1504 { 1505 PetscErrorCode ierr; 1506 PetscFunctionBegin; 1507 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1508 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 /*@C 1513 MatCreateDense - Creates a parallel matrix in dense format. 1514 1515 Collective on MPI_Comm 1516 1517 Input Parameters: 1518 + comm - MPI communicator 1519 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1520 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1521 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1522 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1523 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1524 to control all matrix memory allocation. 1525 1526 Output Parameter: 1527 . A - the matrix 1528 1529 Notes: 1530 The dense format is fully compatible with standard Fortran 77 1531 storage by columns. 1532 1533 The data input variable is intended primarily for Fortran programmers 1534 who wish to allocate their own matrix memory space. Most users should 1535 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1536 1537 The user MUST specify either the local or global matrix dimensions 1538 (possibly both). 1539 1540 Level: intermediate 1541 1542 .keywords: matrix,dense, parallel 1543 1544 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1545 @*/ 1546 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1547 { 1548 PetscErrorCode ierr; 1549 PetscMPIInt size; 1550 1551 PetscFunctionBegin; 1552 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1553 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1554 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1555 if (size > 1) { 1556 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1557 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1558 if (data) { /* user provided data array, so no need to assemble */ 1559 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1560 (*A)->assembled = PETSC_TRUE; 1561 } 1562 } else { 1563 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1564 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1565 } 1566 PetscFunctionReturn(0); 1567 } 1568 1569 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1570 { 1571 Mat mat; 1572 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 *newmat = 0; 1577 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1578 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1579 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1580 a = (Mat_MPIDense*)mat->data; 1581 1582 mat->factortype = A->factortype; 1583 mat->assembled = PETSC_TRUE; 1584 mat->preallocated = PETSC_TRUE; 1585 1586 a->size = oldmat->size; 1587 a->rank = oldmat->rank; 1588 mat->insertmode = NOT_SET_VALUES; 1589 a->nvec = oldmat->nvec; 1590 a->donotstash = oldmat->donotstash; 1591 1592 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1593 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1594 1595 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1596 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1597 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1598 1599 *newmat = mat; 1600 PetscFunctionReturn(0); 1601 } 1602 1603 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1604 { 1605 PetscErrorCode ierr; 1606 PetscMPIInt rank,size; 1607 const PetscInt *rowners; 1608 PetscInt i,m,n,nz,j,mMax; 1609 PetscScalar *array,*vals,*vals_ptr; 1610 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1611 1612 PetscFunctionBegin; 1613 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1614 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1615 1616 /* determine ownership of rows and columns */ 1617 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1618 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1619 1620 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1621 if (!a->A) { 1622 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1623 } 1624 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1625 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1626 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1627 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1628 if (!rank) { 1629 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1630 1631 /* read in my part of the matrix numerical values */ 1632 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1633 1634 /* insert into matrix-by row (this is why cannot directly read into array */ 1635 vals_ptr = vals; 1636 for (i=0; i<m; i++) { 1637 for (j=0; j<N; j++) { 1638 array[i + j*m] = *vals_ptr++; 1639 } 1640 } 1641 1642 /* read in other processors and ship out */ 1643 for (i=1; i<size; i++) { 1644 nz = (rowners[i+1] - rowners[i])*N; 1645 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1646 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1647 } 1648 } else { 1649 /* receive numeric values */ 1650 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1651 1652 /* receive message of values*/ 1653 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1654 1655 /* insert into matrix-by row (this is why cannot directly read into array */ 1656 vals_ptr = vals; 1657 for (i=0; i<m; i++) { 1658 for (j=0; j<N; j++) { 1659 array[i + j*m] = *vals_ptr++; 1660 } 1661 } 1662 } 1663 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1664 ierr = PetscFree(vals);CHKERRQ(ierr); 1665 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1671 { 1672 Mat_MPIDense *a; 1673 PetscScalar *vals,*svals; 1674 MPI_Comm comm; 1675 MPI_Status status; 1676 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1677 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1678 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1679 PetscInt i,nz,j,rstart,rend; 1680 int fd; 1681 PetscBool isbinary; 1682 PetscErrorCode ierr; 1683 1684 PetscFunctionBegin; 1685 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1686 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 1687 1688 /* force binary viewer to load .info file if it has not yet done so */ 1689 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1690 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1691 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1692 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1693 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1694 if (!rank) { 1695 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1696 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1697 } 1698 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1699 M = header[1]; N = header[2]; nz = header[3]; 1700 1701 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1702 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1703 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1704 1705 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1706 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1707 1708 /* 1709 Handle case where matrix is stored on disk as a dense matrix 1710 */ 1711 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1712 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1713 PetscFunctionReturn(0); 1714 } 1715 1716 /* determine ownership of all rows */ 1717 if (newmat->rmap->n < 0) { 1718 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1719 } else { 1720 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1721 } 1722 if (newmat->cmap->n < 0) { 1723 n = PETSC_DECIDE; 1724 } else { 1725 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1726 } 1727 1728 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1729 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1730 rowners[0] = 0; 1731 for (i=2; i<=size; i++) { 1732 rowners[i] += rowners[i-1]; 1733 } 1734 rstart = rowners[rank]; 1735 rend = rowners[rank+1]; 1736 1737 /* distribute row lengths to all processors */ 1738 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1739 if (!rank) { 1740 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1741 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1742 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1743 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1744 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1745 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1746 } else { 1747 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1748 } 1749 1750 if (!rank) { 1751 /* calculate the number of nonzeros on each processor */ 1752 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1754 for (i=0; i<size; i++) { 1755 for (j=rowners[i]; j< rowners[i+1]; j++) { 1756 procsnz[i] += rowlengths[j]; 1757 } 1758 } 1759 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1760 1761 /* determine max buffer needed and allocate it */ 1762 maxnz = 0; 1763 for (i=0; i<size; i++) { 1764 maxnz = PetscMax(maxnz,procsnz[i]); 1765 } 1766 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1767 1768 /* read in my part of the matrix column indices */ 1769 nz = procsnz[0]; 1770 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1771 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1772 1773 /* read in every one elses and ship off */ 1774 for (i=1; i<size; i++) { 1775 nz = procsnz[i]; 1776 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1777 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1778 } 1779 ierr = PetscFree(cols);CHKERRQ(ierr); 1780 } else { 1781 /* determine buffer space needed for message */ 1782 nz = 0; 1783 for (i=0; i<m; i++) { 1784 nz += ourlens[i]; 1785 } 1786 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1787 1788 /* receive message of column indices*/ 1789 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1790 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1791 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1792 } 1793 1794 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1795 a = (Mat_MPIDense*)newmat->data; 1796 if (!a->A) { 1797 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1798 } 1799 1800 if (!rank) { 1801 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1802 1803 /* read in my part of the matrix numerical values */ 1804 nz = procsnz[0]; 1805 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1806 1807 /* insert into matrix */ 1808 jj = rstart; 1809 smycols = mycols; 1810 svals = vals; 1811 for (i=0; i<m; i++) { 1812 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1813 smycols += ourlens[i]; 1814 svals += ourlens[i]; 1815 jj++; 1816 } 1817 1818 /* read in other processors and ship out */ 1819 for (i=1; i<size; i++) { 1820 nz = procsnz[i]; 1821 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1822 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1823 } 1824 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1825 } else { 1826 /* receive numeric values */ 1827 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1828 1829 /* receive message of values*/ 1830 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1831 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1832 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1833 1834 /* insert into matrix */ 1835 jj = rstart; 1836 smycols = mycols; 1837 svals = vals; 1838 for (i=0; i<m; i++) { 1839 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1840 smycols += ourlens[i]; 1841 svals += ourlens[i]; 1842 jj++; 1843 } 1844 } 1845 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1846 ierr = PetscFree(vals);CHKERRQ(ierr); 1847 ierr = PetscFree(mycols);CHKERRQ(ierr); 1848 ierr = PetscFree(rowners);CHKERRQ(ierr); 1849 1850 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1851 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1856 { 1857 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1858 Mat a,b; 1859 PetscBool flg; 1860 PetscErrorCode ierr; 1861 1862 PetscFunctionBegin; 1863 a = matA->A; 1864 b = matB->A; 1865 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1866 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1871 { 1872 PetscErrorCode ierr; 1873 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1874 Mat_TransMatMultDense *atb = a->atbdense; 1875 1876 PetscFunctionBegin; 1877 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1878 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1879 ierr = PetscFree(atb);CHKERRQ(ierr); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1884 { 1885 PetscErrorCode ierr; 1886 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1887 Mat_MatTransMultDense *abt = a->abtdense; 1888 1889 PetscFunctionBegin; 1890 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1891 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1892 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1893 ierr = PetscFree(abt);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1898 { 1899 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1900 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1901 Mat_TransMatMultDense *atb = c->atbdense; 1902 PetscErrorCode ierr; 1903 MPI_Comm comm; 1904 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1905 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1906 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1907 PetscScalar _DOne=1.0,_DZero=0.0; 1908 PetscBLASInt am,an,bn,aN; 1909 const PetscInt *ranges; 1910 1911 PetscFunctionBegin; 1912 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1913 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1914 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1915 1916 /* compute atbarray = aseq^T * bseq */ 1917 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1918 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1919 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1920 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1921 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1922 1923 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1924 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1925 1926 /* arrange atbarray into sendbuf */ 1927 k = 0; 1928 for (proc=0; proc<size; proc++) { 1929 for (j=0; j<cN; j++) { 1930 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1931 } 1932 } 1933 /* sum all atbarray to local values of C */ 1934 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1935 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1936 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1941 { 1942 PetscErrorCode ierr; 1943 Mat Cdense; 1944 MPI_Comm comm; 1945 PetscMPIInt size; 1946 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1947 Mat_MPIDense *c; 1948 Mat_TransMatMultDense *atb; 1949 1950 PetscFunctionBegin; 1951 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1952 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1953 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1954 } 1955 1956 /* create matrix product Cdense */ 1957 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1958 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1959 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1960 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1961 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1962 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 *C = Cdense; 1964 1965 /* create data structure for reuse Cdense */ 1966 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1967 ierr = PetscNew(&atb);CHKERRQ(ierr); 1968 cM = Cdense->rmap->N; 1969 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1970 1971 c = (Mat_MPIDense*)Cdense->data; 1972 c->atbdense = atb; 1973 atb->destroy = Cdense->ops->destroy; 1974 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1979 { 1980 PetscErrorCode ierr; 1981 1982 PetscFunctionBegin; 1983 if (scall == MAT_INITIAL_MATRIX) { 1984 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1985 } 1986 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1987 PetscFunctionReturn(0); 1988 } 1989 1990 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 1991 { 1992 PetscErrorCode ierr; 1993 Mat Cdense; 1994 MPI_Comm comm; 1995 PetscMPIInt i, size; 1996 PetscInt maxRows, bufsiz; 1997 Mat_MPIDense *c; 1998 PetscMPIInt tag; 1999 const char *algTypes[2] = {"allgatherv","cyclic"}; 2000 PetscInt alg, nalg = 2; 2001 Mat_MatTransMultDense *abt; 2002 2003 PetscFunctionBegin; 2004 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2005 alg = 0; /* default is allgatherv */ 2006 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2007 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 2008 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2009 if (A->cmap->N != B->cmap->N) { 2010 SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 2011 } 2012 2013 /* create matrix product Cdense */ 2014 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2015 ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2016 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2017 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2018 ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 2019 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2020 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2021 *C = Cdense; 2022 2023 /* create data structure for reuse Cdense */ 2024 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2025 ierr = PetscNew(&abt);CHKERRQ(ierr); 2026 abt->tag = tag; 2027 abt->alg = alg; 2028 switch (alg) { 2029 case 1: 2030 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2031 bufsiz = A->cmap->N * maxRows; 2032 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2033 break; 2034 default: 2035 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2036 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2037 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2038 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2039 break; 2040 } 2041 2042 c = (Mat_MPIDense*)Cdense->data; 2043 c->abtdense = abt; 2044 abt->destroy = Cdense->ops->destroy; 2045 Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2046 PetscFunctionReturn(0); 2047 } 2048 2049 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2050 { 2051 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2052 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2053 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2054 Mat_MatTransMultDense *abt = c->abtdense; 2055 PetscErrorCode ierr; 2056 MPI_Comm comm; 2057 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2058 PetscScalar *sendbuf, *recvbuf=0, *carray; 2059 PetscInt i,cK=A->cmap->N,k,j,bn; 2060 PetscScalar _DOne=1.0,_DZero=0.0; 2061 PetscBLASInt cm, cn, ck; 2062 MPI_Request reqs[2]; 2063 const PetscInt *ranges; 2064 2065 PetscFunctionBegin; 2066 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2067 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2068 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2069 2070 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2071 bn = B->rmap->n; 2072 if (bseq->lda == bn) { 2073 sendbuf = bseq->v; 2074 } else { 2075 sendbuf = abt->buf[0]; 2076 for (k = 0, i = 0; i < cK; i++) { 2077 for (j = 0; j < bn; j++, k++) { 2078 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2079 } 2080 } 2081 } 2082 if (size > 1) { 2083 sendto = (rank + size - 1) % size; 2084 recvfrom = (rank + size + 1) % size; 2085 } else { 2086 sendto = recvfrom = 0; 2087 } 2088 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2090 recvisfrom = rank; 2091 for (i = 0; i < size; i++) { 2092 /* we have finished receiving in sending, bufs can be read/modified */ 2093 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2094 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2095 2096 if (nextrecvisfrom != rank) { 2097 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2098 sendsiz = cK * bn; 2099 recvsiz = cK * nextbn; 2100 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2101 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2102 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2103 } 2104 2105 /* local aseq * sendbuf^T */ 2106 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2107 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 2108 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 2109 2110 if (nextrecvisfrom != rank) { 2111 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2112 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2113 } 2114 bn = nextbn; 2115 recvisfrom = nextrecvisfrom; 2116 sendbuf = recvbuf; 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2122 { 2123 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2124 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2125 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2126 Mat_MatTransMultDense *abt = c->abtdense; 2127 PetscErrorCode ierr; 2128 MPI_Comm comm; 2129 PetscMPIInt rank,size; 2130 PetscScalar *sendbuf, *recvbuf; 2131 PetscInt i,cK=A->cmap->N,k,j,bn; 2132 PetscScalar _DOne=1.0,_DZero=0.0; 2133 PetscBLASInt cm, cn, ck; 2134 2135 PetscFunctionBegin; 2136 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2137 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2138 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2139 2140 /* copy transpose of B into buf[0] */ 2141 bn = B->rmap->n; 2142 sendbuf = abt->buf[0]; 2143 recvbuf = abt->buf[1]; 2144 for (k = 0, j = 0; j < bn; j++) { 2145 for (i = 0; i < cK; i++, k++) { 2146 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2147 } 2148 } 2149 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2150 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2151 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2152 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2153 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2158 { 2159 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2160 Mat_MatTransMultDense *abt = c->abtdense; 2161 PetscErrorCode ierr; 2162 2163 PetscFunctionBegin; 2164 switch (abt->alg) { 2165 case 1: 2166 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2167 break; 2168 default: 2169 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2170 break; 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 2176 { 2177 PetscErrorCode ierr; 2178 2179 PetscFunctionBegin; 2180 if (scall == MAT_INITIAL_MATRIX) { 2181 ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2182 } 2183 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2188 { 2189 PetscErrorCode ierr; 2190 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2191 Mat_MatMultDense *ab = a->abdense; 2192 2193 PetscFunctionBegin; 2194 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2195 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2196 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2197 2198 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2199 ierr = PetscFree(ab);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #if defined(PETSC_HAVE_ELEMENTAL) 2204 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2205 { 2206 PetscErrorCode ierr; 2207 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2208 Mat_MatMultDense *ab=c->abdense; 2209 2210 PetscFunctionBegin; 2211 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2212 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2213 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2214 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 2219 { 2220 PetscErrorCode ierr; 2221 Mat Ae,Be,Ce; 2222 Mat_MPIDense *c; 2223 Mat_MatMultDense *ab; 2224 2225 PetscFunctionBegin; 2226 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2227 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2228 } 2229 2230 /* convert A and B to Elemental matrices Ae and Be */ 2231 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2232 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2233 2234 /* Ce = Ae*Be */ 2235 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2236 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2237 2238 /* convert Ce to C */ 2239 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2240 2241 /* create data structure for reuse Cdense */ 2242 ierr = PetscNew(&ab);CHKERRQ(ierr); 2243 c = (Mat_MPIDense*)(*C)->data; 2244 c->abdense = ab; 2245 2246 ab->Ae = Ae; 2247 ab->Be = Be; 2248 ab->Ce = Ce; 2249 ab->destroy = (*C)->ops->destroy; 2250 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2251 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2252 PetscFunctionReturn(0); 2253 } 2254 2255 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2256 { 2257 PetscErrorCode ierr; 2258 2259 PetscFunctionBegin; 2260 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2261 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2262 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2263 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2264 } else { 2265 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2266 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2267 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2268 } 2269 PetscFunctionReturn(0); 2270 } 2271 #endif 2272 2273