1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 188 { 189 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 190 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 191 PetscErrorCode ierr; 192 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 193 const PetscInt *irow,*icol; 194 PetscScalar *av,*bv,*v = lmat->v; 195 Mat newmat; 196 IS iscol_local; 197 MPI_Comm comm_is,comm_mat; 198 199 PetscFunctionBegin; 200 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 201 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 202 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 203 204 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 205 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 206 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 207 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 208 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 209 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 210 211 /* No parallel redistribution currently supported! Should really check each index set 212 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 213 original matrix! */ 214 215 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 216 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 217 218 /* Check submatrix call */ 219 if (scall == MAT_REUSE_MATRIX) { 220 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 221 /* Really need to test rows and column sizes! */ 222 newmat = *B; 223 } else { 224 /* Create and fill new matrix */ 225 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 226 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 227 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 228 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 229 } 230 231 /* Now extract the data pointers and do the copy, column at a time */ 232 newmatd = (Mat_MPIDense*)newmat->data; 233 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 234 235 for (i=0; i<Ncols; i++) { 236 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 237 for (j=0; j<nrows; j++) { 238 *bv++ = av[irow[j] - rstart]; 239 } 240 } 241 242 /* Assemble the matrices so that the correct flags are set */ 243 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 244 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 245 246 /* Free work space */ 247 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 248 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 249 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 250 *B = newmat; 251 PetscFunctionReturn(0); 252 } 253 254 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 255 { 256 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 257 PetscErrorCode ierr; 258 259 PetscFunctionBegin; 260 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 275 { 276 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 277 MPI_Comm comm; 278 PetscErrorCode ierr; 279 PetscInt nstash,reallocs; 280 InsertMode addv; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 284 /* make sure all processors are either in INSERTMODE or ADDMODE */ 285 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 286 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 287 mat->insertmode = addv; /* in case this processor had no cache */ 288 289 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 290 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 291 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 296 { 297 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 298 PetscErrorCode ierr; 299 PetscInt i,*row,*col,flg,j,rstart,ncols; 300 PetscMPIInt n; 301 PetscScalar *val; 302 303 PetscFunctionBegin; 304 /* wait on receives */ 305 while (1) { 306 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 307 if (!flg) break; 308 309 for (i=0; i<n;) { 310 /* Now identify the consecutive vals belonging to the same row */ 311 for (j=i,rstart=row[j]; j<n; j++) { 312 if (row[j] != rstart) break; 313 } 314 if (j < n) ncols = j-i; 315 else ncols = n-i; 316 /* Now assemble all these values with a single function call */ 317 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 318 i = j; 319 } 320 } 321 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 322 323 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 324 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 325 326 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 327 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 332 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 333 { 334 PetscErrorCode ierr; 335 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 336 337 PetscFunctionBegin; 338 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 342 /* the code does not do the diagonal entries correctly unless the 343 matrix is square and the column and row owerships are identical. 344 This is a BUG. The only way to fix it seems to be to access 345 mdn->A and mdn->B directly and not through the MatZeroRows() 346 routine. 347 */ 348 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 349 { 350 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 351 PetscErrorCode ierr; 352 PetscInt i,*owners = A->rmap->range; 353 PetscInt *sizes,j,idx,nsends; 354 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 355 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 356 PetscInt *lens,*lrows,*values; 357 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 358 MPI_Comm comm; 359 MPI_Request *send_waits,*recv_waits; 360 MPI_Status recv_status,*send_status; 361 PetscBool found; 362 const PetscScalar *xx; 363 PetscScalar *bb; 364 365 PetscFunctionBegin; 366 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 367 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 368 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 369 /* first count number of contributors to each processor */ 370 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 371 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 372 for (i=0; i<N; i++) { 373 idx = rows[i]; 374 found = PETSC_FALSE; 375 for (j=0; j<size; j++) { 376 if (idx >= owners[j] && idx < owners[j+1]) { 377 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 378 } 379 } 380 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 381 } 382 nsends = 0; 383 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 384 385 /* inform other processors of number of messages and max length*/ 386 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 387 388 /* post receives: */ 389 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 390 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 391 for (i=0; i<nrecvs; i++) { 392 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 393 } 394 395 /* do sends: 396 1) starts[i] gives the starting index in svalues for stuff going to 397 the ith processor 398 */ 399 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 400 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 401 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 402 403 starts[0] = 0; 404 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 405 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 406 407 starts[0] = 0; 408 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 409 count = 0; 410 for (i=0; i<size; i++) { 411 if (sizes[2*i+1]) { 412 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 413 } 414 } 415 ierr = PetscFree(starts);CHKERRQ(ierr); 416 417 base = owners[rank]; 418 419 /* wait on receives */ 420 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 421 count = nrecvs; 422 slen = 0; 423 while (count) { 424 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 425 /* unpack receives into our local space */ 426 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 427 428 source[imdex] = recv_status.MPI_SOURCE; 429 lens[imdex] = n; 430 slen += n; 431 count--; 432 } 433 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 434 435 /* move the data into the send scatter */ 436 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 437 count = 0; 438 for (i=0; i<nrecvs; i++) { 439 values = rvalues + i*nmax; 440 for (j=0; j<lens[i]; j++) { 441 lrows[count++] = values[j] - base; 442 } 443 } 444 ierr = PetscFree(rvalues);CHKERRQ(ierr); 445 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 446 ierr = PetscFree(owner);CHKERRQ(ierr); 447 ierr = PetscFree(sizes);CHKERRQ(ierr); 448 449 /* fix right hand side if needed */ 450 if (x && b) { 451 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 452 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 453 for (i=0; i<slen; i++) { 454 bb[lrows[i]] = diag*xx[lrows[i]]; 455 } 456 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 457 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 458 } 459 460 /* actually zap the local rows */ 461 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 462 if (diag != 0.0) { 463 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 464 PetscInt m = ll->lda, i; 465 466 for (i=0; i<slen; i++) { 467 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 468 } 469 } 470 ierr = PetscFree(lrows);CHKERRQ(ierr); 471 472 /* wait on sends */ 473 if (nsends) { 474 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 475 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 476 ierr = PetscFree(send_status);CHKERRQ(ierr); 477 } 478 ierr = PetscFree(send_waits);CHKERRQ(ierr); 479 ierr = PetscFree(svalues);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 484 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 485 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 486 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 487 488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 489 { 490 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 491 PetscErrorCode ierr; 492 493 PetscFunctionBegin; 494 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 495 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 501 { 502 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 507 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 508 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 513 { 514 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 515 PetscErrorCode ierr; 516 PetscScalar zero = 0.0; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,zero);CHKERRQ(ierr); 520 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 522 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 527 { 528 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 529 PetscErrorCode ierr; 530 531 PetscFunctionBegin; 532 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 533 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 534 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 535 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 543 PetscErrorCode ierr; 544 PetscInt len,i,n,m = A->rmap->n,radd; 545 PetscScalar *x,zero = 0.0; 546 547 PetscFunctionBegin; 548 ierr = VecSet(v,zero);CHKERRQ(ierr); 549 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 550 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 551 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 552 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 553 radd = A->rmap->rstart*m; 554 for (i=0; i<len; i++) { 555 x[i] = aloc->v[radd + i*m + i]; 556 } 557 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 PetscErrorCode MatDestroy_MPIDense(Mat mat) 562 { 563 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 564 PetscErrorCode ierr; 565 566 PetscFunctionBegin; 567 #if defined(PETSC_USE_LOG) 568 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 569 #endif 570 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 571 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 572 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 574 575 ierr = PetscFree(mat->data);CHKERRQ(ierr); 576 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 577 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 584 #if defined(PETSC_HAVE_ELEMENTAL) 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 586 #endif 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 600 { 601 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 602 PetscErrorCode ierr; 603 PetscViewerFormat format; 604 int fd; 605 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 606 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 607 PetscScalar *work,*v,*vv; 608 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 609 610 PetscFunctionBegin; 611 if (mdn->size == 1) { 612 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 613 } else { 614 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 615 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 616 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 617 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_NATIVE) { 620 621 if (!rank) { 622 /* store the matrix as a dense matrix */ 623 header[0] = MAT_FILE_CLASSID; 624 header[1] = mat->rmap->N; 625 header[2] = N; 626 header[3] = MATRIX_BINARY_FORMAT_DENSE; 627 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 628 629 /* get largest work array needed for transposing array */ 630 mmax = mat->rmap->n; 631 for (i=1; i<size; i++) { 632 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 633 } 634 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 635 636 /* write out local array, by rows */ 637 m = mat->rmap->n; 638 v = a->v; 639 for (j=0; j<N; j++) { 640 for (i=0; i<m; i++) { 641 work[j + i*N] = *v++; 642 } 643 } 644 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 645 /* get largest work array to receive messages from other processes, excludes process zero */ 646 mmax = 0; 647 for (i=1; i<size; i++) { 648 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 649 } 650 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 651 for (k = 1; k < size; k++) { 652 v = vv; 653 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 654 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 655 656 for (j = 0; j < N; j++) { 657 for (i = 0; i < m; i++) { 658 work[j + i*N] = *v++; 659 } 660 } 661 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 662 } 663 ierr = PetscFree(work);CHKERRQ(ierr); 664 ierr = PetscFree(vv);CHKERRQ(ierr); 665 } else { 666 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 667 } 668 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 669 } 670 PetscFunctionReturn(0); 671 } 672 673 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 674 #include <petscdraw.h> 675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 676 { 677 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 678 PetscErrorCode ierr; 679 PetscMPIInt rank = mdn->rank; 680 PetscViewerType vtype; 681 PetscBool iascii,isdraw; 682 PetscViewer sviewer; 683 PetscViewerFormat format; 684 685 PetscFunctionBegin; 686 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 687 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 688 if (iascii) { 689 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 690 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 691 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 692 MatInfo info; 693 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 694 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 695 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 697 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 698 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } else if (format == PETSC_VIEWER_ASCII_INFO) { 701 PetscFunctionReturn(0); 702 } 703 } else if (isdraw) { 704 PetscDraw draw; 705 PetscBool isnull; 706 707 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 708 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 709 if (isnull) PetscFunctionReturn(0); 710 } 711 712 { 713 /* assemble the entire matrix onto first processor. */ 714 Mat A; 715 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 716 PetscInt *cols; 717 PetscScalar *vals; 718 719 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 720 if (!rank) { 721 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 722 } else { 723 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 724 } 725 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 726 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 727 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 728 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 729 730 /* Copy the matrix ... This isn't the most efficient means, 731 but it's quick for now */ 732 A->insertmode = INSERT_VALUES; 733 734 row = mat->rmap->rstart; 735 m = mdn->A->rmap->n; 736 for (i=0; i<m; i++) { 737 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 738 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 739 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 740 row++; 741 } 742 743 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 744 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 746 if (!rank) { 747 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 748 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 749 } 750 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 752 ierr = MatDestroy(&A);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 758 { 759 PetscErrorCode ierr; 760 PetscBool iascii,isbinary,isdraw,issocket; 761 762 PetscFunctionBegin; 763 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 764 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 765 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 766 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 767 768 if (iascii || issocket || isdraw) { 769 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 770 } else if (isbinary) { 771 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 772 } 773 PetscFunctionReturn(0); 774 } 775 776 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 777 { 778 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 779 Mat mdn = mat->A; 780 PetscErrorCode ierr; 781 PetscReal isend[5],irecv[5]; 782 783 PetscFunctionBegin; 784 info->block_size = 1.0; 785 786 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 787 788 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 789 isend[3] = info->memory; isend[4] = info->mallocs; 790 if (flag == MAT_LOCAL) { 791 info->nz_used = isend[0]; 792 info->nz_allocated = isend[1]; 793 info->nz_unneeded = isend[2]; 794 info->memory = isend[3]; 795 info->mallocs = isend[4]; 796 } else if (flag == MAT_GLOBAL_MAX) { 797 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 798 799 info->nz_used = irecv[0]; 800 info->nz_allocated = irecv[1]; 801 info->nz_unneeded = irecv[2]; 802 info->memory = irecv[3]; 803 info->mallocs = irecv[4]; 804 } else if (flag == MAT_GLOBAL_SUM) { 805 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 806 807 info->nz_used = irecv[0]; 808 info->nz_allocated = irecv[1]; 809 info->nz_unneeded = irecv[2]; 810 info->memory = irecv[3]; 811 info->mallocs = irecv[4]; 812 } 813 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 814 info->fill_ratio_needed = 0; 815 info->factor_mallocs = 0; 816 PetscFunctionReturn(0); 817 } 818 819 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 820 { 821 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 switch (op) { 826 case MAT_NEW_NONZERO_LOCATIONS: 827 case MAT_NEW_NONZERO_LOCATION_ERR: 828 case MAT_NEW_NONZERO_ALLOCATION_ERR: 829 MatCheckPreallocated(A,1); 830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 831 break; 832 case MAT_ROW_ORIENTED: 833 MatCheckPreallocated(A,1); 834 a->roworiented = flg; 835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 836 break; 837 case MAT_NEW_DIAGONALS: 838 case MAT_KEEP_NONZERO_PATTERN: 839 case MAT_USE_HASH_TABLE: 840 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 841 break; 842 case MAT_IGNORE_OFF_PROC_ENTRIES: 843 a->donotstash = flg; 844 break; 845 case MAT_SYMMETRIC: 846 case MAT_STRUCTURALLY_SYMMETRIC: 847 case MAT_HERMITIAN: 848 case MAT_SYMMETRY_ETERNAL: 849 case MAT_IGNORE_LOWER_TRIANGULAR: 850 case MAT_IGNORE_ZERO_ENTRIES: 851 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 852 break; 853 default: 854 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 855 } 856 PetscFunctionReturn(0); 857 } 858 859 860 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 861 { 862 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 863 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 864 const PetscScalar *l,*r; 865 PetscScalar x,*v; 866 PetscErrorCode ierr; 867 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 868 869 PetscFunctionBegin; 870 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 871 if (ll) { 872 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 873 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 874 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 875 for (i=0; i<m; i++) { 876 x = l[i]; 877 v = mat->v + i; 878 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 879 } 880 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 881 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 882 } 883 if (rr) { 884 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 885 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 886 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 889 for (i=0; i<n; i++) { 890 x = r[i]; 891 v = mat->v + i*m; 892 for (j=0; j<m; j++) (*v++) *= x; 893 } 894 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 895 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } 899 900 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 901 { 902 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 903 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 904 PetscErrorCode ierr; 905 PetscInt i,j; 906 PetscReal sum = 0.0; 907 PetscScalar *v = mat->v; 908 909 PetscFunctionBegin; 910 if (mdn->size == 1) { 911 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 912 } else { 913 if (type == NORM_FROBENIUS) { 914 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 915 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 916 } 917 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 918 *nrm = PetscSqrtReal(*nrm); 919 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 920 } else if (type == NORM_1) { 921 PetscReal *tmp,*tmp2; 922 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 923 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 924 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 925 *nrm = 0.0; 926 v = mat->v; 927 for (j=0; j<mdn->A->cmap->n; j++) { 928 for (i=0; i<mdn->A->rmap->n; i++) { 929 tmp[j] += PetscAbsScalar(*v); v++; 930 } 931 } 932 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 933 for (j=0; j<A->cmap->N; j++) { 934 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 935 } 936 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 937 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 938 } else if (type == NORM_INFINITY) { /* max row norm */ 939 PetscReal ntemp; 940 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 941 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 942 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 948 { 949 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 950 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 951 Mat B; 952 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 953 PetscErrorCode ierr; 954 PetscInt j,i; 955 PetscScalar *v; 956 957 PetscFunctionBegin; 958 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 959 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 960 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 961 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 962 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 963 } else { 964 B = *matout; 965 } 966 967 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 968 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 969 for (i=0; i<m; i++) rwork[i] = rstart + i; 970 for (j=0; j<n; j++) { 971 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 972 v += m; 973 } 974 ierr = PetscFree(rwork);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 978 *matout = B; 979 } else { 980 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 981 } 982 PetscFunctionReturn(0); 983 } 984 985 986 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 987 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 988 989 PetscErrorCode MatSetUp_MPIDense(Mat A) 990 { 991 PetscErrorCode ierr; 992 993 PetscFunctionBegin; 994 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 999 { 1000 PetscErrorCode ierr; 1001 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1002 1003 PetscFunctionBegin; 1004 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1005 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1010 { 1011 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatRealPart_MPIDense(Mat A) 1020 { 1021 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1022 PetscErrorCode ierr; 1023 1024 PetscFunctionBegin; 1025 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1030 { 1031 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1032 PetscErrorCode ierr; 1033 1034 PetscFunctionBegin; 1035 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1040 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1041 { 1042 PetscErrorCode ierr; 1043 PetscInt i,n; 1044 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1045 PetscReal *work; 1046 1047 PetscFunctionBegin; 1048 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1049 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1050 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1051 if (type == NORM_2) { 1052 for (i=0; i<n; i++) work[i] *= work[i]; 1053 } 1054 if (type == NORM_INFINITY) { 1055 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1056 } else { 1057 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1058 } 1059 ierr = PetscFree(work);CHKERRQ(ierr); 1060 if (type == NORM_2) { 1061 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1067 { 1068 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1069 PetscErrorCode ierr; 1070 PetscScalar *a; 1071 PetscInt m,n,i; 1072 1073 PetscFunctionBegin; 1074 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1075 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1076 for (i=0; i<m*n; i++) { 1077 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1078 } 1079 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1084 1085 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1086 { 1087 PetscFunctionBegin; 1088 *missing = PETSC_FALSE; 1089 PetscFunctionReturn(0); 1090 } 1091 1092 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1093 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1094 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1095 1096 /* -------------------------------------------------------------------*/ 1097 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1098 MatGetRow_MPIDense, 1099 MatRestoreRow_MPIDense, 1100 MatMult_MPIDense, 1101 /* 4*/ MatMultAdd_MPIDense, 1102 MatMultTranspose_MPIDense, 1103 MatMultTransposeAdd_MPIDense, 1104 0, 1105 0, 1106 0, 1107 /* 10*/ 0, 1108 0, 1109 0, 1110 0, 1111 MatTranspose_MPIDense, 1112 /* 15*/ MatGetInfo_MPIDense, 1113 MatEqual_MPIDense, 1114 MatGetDiagonal_MPIDense, 1115 MatDiagonalScale_MPIDense, 1116 MatNorm_MPIDense, 1117 /* 20*/ MatAssemblyBegin_MPIDense, 1118 MatAssemblyEnd_MPIDense, 1119 MatSetOption_MPIDense, 1120 MatZeroEntries_MPIDense, 1121 /* 24*/ MatZeroRows_MPIDense, 1122 0, 1123 0, 1124 0, 1125 0, 1126 /* 29*/ MatSetUp_MPIDense, 1127 0, 1128 0, 1129 MatGetDiagonalBlock_MPIDense, 1130 0, 1131 /* 34*/ MatDuplicate_MPIDense, 1132 0, 1133 0, 1134 0, 1135 0, 1136 /* 39*/ MatAXPY_MPIDense, 1137 MatCreateSubMatrices_MPIDense, 1138 0, 1139 MatGetValues_MPIDense, 1140 0, 1141 /* 44*/ 0, 1142 MatScale_MPIDense, 1143 MatShift_Basic, 1144 0, 1145 0, 1146 /* 49*/ MatSetRandom_MPIDense, 1147 0, 1148 0, 1149 0, 1150 0, 1151 /* 54*/ 0, 1152 0, 1153 0, 1154 0, 1155 0, 1156 /* 59*/ MatCreateSubMatrix_MPIDense, 1157 MatDestroy_MPIDense, 1158 MatView_MPIDense, 1159 0, 1160 0, 1161 /* 64*/ 0, 1162 0, 1163 0, 1164 0, 1165 0, 1166 /* 69*/ 0, 1167 0, 1168 0, 1169 0, 1170 0, 1171 /* 74*/ 0, 1172 0, 1173 0, 1174 0, 1175 0, 1176 /* 79*/ 0, 1177 0, 1178 0, 1179 0, 1180 /* 83*/ MatLoad_MPIDense, 1181 0, 1182 0, 1183 0, 1184 0, 1185 0, 1186 #if defined(PETSC_HAVE_ELEMENTAL) 1187 /* 89*/ MatMatMult_MPIDense_MPIDense, 1188 MatMatMultSymbolic_MPIDense_MPIDense, 1189 #else 1190 /* 89*/ 0, 1191 0, 1192 #endif 1193 MatMatMultNumeric_MPIDense, 1194 0, 1195 0, 1196 /* 94*/ 0, 1197 MatMatTransposeMult_MPIDense_MPIDense, 1198 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1199 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1200 0, 1201 /* 99*/ 0, 1202 0, 1203 0, 1204 MatConjugate_MPIDense, 1205 0, 1206 /*104*/ 0, 1207 MatRealPart_MPIDense, 1208 MatImaginaryPart_MPIDense, 1209 0, 1210 0, 1211 /*109*/ 0, 1212 0, 1213 0, 1214 0, 1215 MatMissingDiagonal_MPIDense, 1216 /*114*/ 0, 1217 0, 1218 0, 1219 0, 1220 0, 1221 /*119*/ 0, 1222 0, 1223 0, 1224 0, 1225 0, 1226 /*124*/ 0, 1227 MatGetColumnNorms_MPIDense, 1228 0, 1229 0, 1230 0, 1231 /*129*/ 0, 1232 MatTransposeMatMult_MPIDense_MPIDense, 1233 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1234 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1235 0, 1236 /*134*/ 0, 1237 0, 1238 0, 1239 0, 1240 0, 1241 /*139*/ 0, 1242 0, 1243 0, 1244 0, 1245 0, 1246 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1247 }; 1248 1249 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1250 { 1251 Mat_MPIDense *a; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1256 mat->preallocated = PETSC_TRUE; 1257 /* Note: For now, when data is specified above, this assumes the user correctly 1258 allocates the local dense storage space. We should add error checking. */ 1259 1260 a = (Mat_MPIDense*)mat->data; 1261 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1262 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1263 a->nvec = mat->cmap->n; 1264 1265 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1266 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1267 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1268 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1269 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #if defined(PETSC_HAVE_ELEMENTAL) 1274 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1275 { 1276 Mat mat_elemental; 1277 PetscErrorCode ierr; 1278 PetscScalar *v; 1279 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1280 1281 PetscFunctionBegin; 1282 if (reuse == MAT_REUSE_MATRIX) { 1283 mat_elemental = *newmat; 1284 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1285 } else { 1286 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1287 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1288 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1289 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1290 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1291 } 1292 1293 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1294 for (i=0; i<N; i++) cols[i] = i; 1295 for (i=0; i<m; i++) rows[i] = rstart + i; 1296 1297 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1298 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1299 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1300 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1301 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1302 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1303 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1304 1305 if (reuse == MAT_INPLACE_MATRIX) { 1306 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1307 } else { 1308 *newmat = mat_elemental; 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 #endif 1313 1314 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1315 { 1316 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1325 { 1326 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1335 { 1336 PetscErrorCode ierr; 1337 Mat_MPIDense *mat; 1338 PetscInt m,nloc,N; 1339 1340 PetscFunctionBegin; 1341 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1343 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1344 PetscInt sum; 1345 1346 if (n == PETSC_DECIDE) { 1347 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1348 } 1349 /* Check sum(n) = N */ 1350 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1351 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1352 1353 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1354 } 1355 1356 /* numeric phase */ 1357 mat = (Mat_MPIDense*)(*outmat)->data; 1358 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1359 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1360 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1365 { 1366 Mat_MPIDense *a; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1371 mat->data = (void*)a; 1372 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1373 1374 mat->insertmode = NOT_SET_VALUES; 1375 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1376 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1377 1378 /* build cache for off array entries formed */ 1379 a->donotstash = PETSC_FALSE; 1380 1381 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1382 1383 /* stuff used for matrix vector multiply */ 1384 a->lvec = 0; 1385 a->Mvctx = 0; 1386 a->roworiented = PETSC_TRUE; 1387 1388 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1389 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1390 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1391 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1392 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1393 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1394 #if defined(PETSC_HAVE_ELEMENTAL) 1395 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1396 #endif 1397 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1398 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1399 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1400 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1401 1402 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1403 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1404 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1405 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1406 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1407 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 /*MC 1412 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1413 1414 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1415 and MATMPIDENSE otherwise. 1416 1417 Options Database Keys: 1418 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1419 1420 Level: beginner 1421 1422 1423 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1424 M*/ 1425 1426 /*@C 1427 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1428 1429 Not collective 1430 1431 Input Parameters: 1432 . B - the matrix 1433 - data - optional location of matrix data. Set data=NULL for PETSc 1434 to control all matrix memory allocation. 1435 1436 Notes: 1437 The dense format is fully compatible with standard Fortran 77 1438 storage by columns. 1439 1440 The data input variable is intended primarily for Fortran programmers 1441 who wish to allocate their own matrix memory space. Most users should 1442 set data=NULL. 1443 1444 Level: intermediate 1445 1446 .keywords: matrix,dense, parallel 1447 1448 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1449 @*/ 1450 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1451 { 1452 PetscErrorCode ierr; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 /*@ 1460 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1461 array provided by the user. This is useful to avoid copying an array 1462 into a matrix 1463 1464 Not Collective 1465 1466 Input Parameters: 1467 + mat - the matrix 1468 - array - the array in column major order 1469 1470 Notes: 1471 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1472 freed when the matrix is destroyed. 1473 1474 Level: developer 1475 1476 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1477 1478 @*/ 1479 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1480 { 1481 PetscErrorCode ierr; 1482 PetscFunctionBegin; 1483 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1484 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 /*@ 1489 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1490 1491 Not Collective 1492 1493 Input Parameters: 1494 . mat - the matrix 1495 1496 Notes: 1497 You can only call this after a call to MatDensePlaceArray() 1498 1499 Level: developer 1500 1501 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1502 1503 @*/ 1504 PetscErrorCode MatDenseResetArray(Mat mat) 1505 { 1506 PetscErrorCode ierr; 1507 PetscFunctionBegin; 1508 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1509 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 /*@C 1514 MatCreateDense - Creates a parallel matrix in dense format. 1515 1516 Collective on MPI_Comm 1517 1518 Input Parameters: 1519 + comm - MPI communicator 1520 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1521 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1522 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1523 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1524 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1525 to control all matrix memory allocation. 1526 1527 Output Parameter: 1528 . A - the matrix 1529 1530 Notes: 1531 The dense format is fully compatible with standard Fortran 77 1532 storage by columns. 1533 1534 The data input variable is intended primarily for Fortran programmers 1535 who wish to allocate their own matrix memory space. Most users should 1536 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1537 1538 The user MUST specify either the local or global matrix dimensions 1539 (possibly both). 1540 1541 Level: intermediate 1542 1543 .keywords: matrix,dense, parallel 1544 1545 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1546 @*/ 1547 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1548 { 1549 PetscErrorCode ierr; 1550 PetscMPIInt size; 1551 1552 PetscFunctionBegin; 1553 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1554 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1555 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1556 if (size > 1) { 1557 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1558 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1559 if (data) { /* user provided data array, so no need to assemble */ 1560 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1561 (*A)->assembled = PETSC_TRUE; 1562 } 1563 } else { 1564 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1565 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1571 { 1572 Mat mat; 1573 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1574 PetscErrorCode ierr; 1575 1576 PetscFunctionBegin; 1577 *newmat = 0; 1578 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1579 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1580 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1581 a = (Mat_MPIDense*)mat->data; 1582 1583 mat->factortype = A->factortype; 1584 mat->assembled = PETSC_TRUE; 1585 mat->preallocated = PETSC_TRUE; 1586 1587 a->size = oldmat->size; 1588 a->rank = oldmat->rank; 1589 mat->insertmode = NOT_SET_VALUES; 1590 a->nvec = oldmat->nvec; 1591 a->donotstash = oldmat->donotstash; 1592 1593 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1594 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1595 1596 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1597 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1598 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1599 1600 *newmat = mat; 1601 PetscFunctionReturn(0); 1602 } 1603 1604 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1605 { 1606 PetscErrorCode ierr; 1607 PetscMPIInt rank,size; 1608 const PetscInt *rowners; 1609 PetscInt i,m,n,nz,j,mMax; 1610 PetscScalar *array,*vals,*vals_ptr; 1611 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1612 1613 PetscFunctionBegin; 1614 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1615 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1616 1617 /* determine ownership of rows and columns */ 1618 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1619 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1620 1621 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1622 if (!a->A) { 1623 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1624 } 1625 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1626 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1627 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1628 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1629 if (!rank) { 1630 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1631 1632 /* read in my part of the matrix numerical values */ 1633 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1634 1635 /* insert into matrix-by row (this is why cannot directly read into array */ 1636 vals_ptr = vals; 1637 for (i=0; i<m; i++) { 1638 for (j=0; j<N; j++) { 1639 array[i + j*m] = *vals_ptr++; 1640 } 1641 } 1642 1643 /* read in other processors and ship out */ 1644 for (i=1; i<size; i++) { 1645 nz = (rowners[i+1] - rowners[i])*N; 1646 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1647 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1648 } 1649 } else { 1650 /* receive numeric values */ 1651 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1652 1653 /* receive message of values*/ 1654 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1655 1656 /* insert into matrix-by row (this is why cannot directly read into array */ 1657 vals_ptr = vals; 1658 for (i=0; i<m; i++) { 1659 for (j=0; j<N; j++) { 1660 array[i + j*m] = *vals_ptr++; 1661 } 1662 } 1663 } 1664 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1665 ierr = PetscFree(vals);CHKERRQ(ierr); 1666 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1672 { 1673 Mat_MPIDense *a; 1674 PetscScalar *vals,*svals; 1675 MPI_Comm comm; 1676 MPI_Status status; 1677 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1678 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1679 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1680 PetscInt i,nz,j,rstart,rend; 1681 int fd; 1682 PetscBool isbinary; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1687 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 1688 1689 /* force binary viewer to load .info file if it has not yet done so */ 1690 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1691 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1692 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1693 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1694 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1695 if (!rank) { 1696 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1697 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1698 } 1699 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1700 M = header[1]; N = header[2]; nz = header[3]; 1701 1702 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1703 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1704 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1705 1706 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1707 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1708 1709 /* 1710 Handle case where matrix is stored on disk as a dense matrix 1711 */ 1712 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1713 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 1717 /* determine ownership of all rows */ 1718 if (newmat->rmap->n < 0) { 1719 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1720 } else { 1721 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1722 } 1723 if (newmat->cmap->n < 0) { 1724 n = PETSC_DECIDE; 1725 } else { 1726 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1727 } 1728 1729 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1730 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1731 rowners[0] = 0; 1732 for (i=2; i<=size; i++) { 1733 rowners[i] += rowners[i-1]; 1734 } 1735 rstart = rowners[rank]; 1736 rend = rowners[rank+1]; 1737 1738 /* distribute row lengths to all processors */ 1739 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1740 if (!rank) { 1741 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1742 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1743 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1744 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1745 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1746 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1747 } else { 1748 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1749 } 1750 1751 if (!rank) { 1752 /* calculate the number of nonzeros on each processor */ 1753 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1754 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1755 for (i=0; i<size; i++) { 1756 for (j=rowners[i]; j< rowners[i+1]; j++) { 1757 procsnz[i] += rowlengths[j]; 1758 } 1759 } 1760 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1761 1762 /* determine max buffer needed and allocate it */ 1763 maxnz = 0; 1764 for (i=0; i<size; i++) { 1765 maxnz = PetscMax(maxnz,procsnz[i]); 1766 } 1767 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1768 1769 /* read in my part of the matrix column indices */ 1770 nz = procsnz[0]; 1771 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1772 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1773 1774 /* read in every one elses and ship off */ 1775 for (i=1; i<size; i++) { 1776 nz = procsnz[i]; 1777 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1778 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1779 } 1780 ierr = PetscFree(cols);CHKERRQ(ierr); 1781 } else { 1782 /* determine buffer space needed for message */ 1783 nz = 0; 1784 for (i=0; i<m; i++) { 1785 nz += ourlens[i]; 1786 } 1787 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1788 1789 /* receive message of column indices*/ 1790 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1791 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1792 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1793 } 1794 1795 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1796 a = (Mat_MPIDense*)newmat->data; 1797 if (!a->A) { 1798 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1799 } 1800 1801 if (!rank) { 1802 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1803 1804 /* read in my part of the matrix numerical values */ 1805 nz = procsnz[0]; 1806 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1807 1808 /* insert into matrix */ 1809 jj = rstart; 1810 smycols = mycols; 1811 svals = vals; 1812 for (i=0; i<m; i++) { 1813 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1814 smycols += ourlens[i]; 1815 svals += ourlens[i]; 1816 jj++; 1817 } 1818 1819 /* read in other processors and ship out */ 1820 for (i=1; i<size; i++) { 1821 nz = procsnz[i]; 1822 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1823 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1824 } 1825 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1826 } else { 1827 /* receive numeric values */ 1828 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1829 1830 /* receive message of values*/ 1831 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1832 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1833 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1834 1835 /* insert into matrix */ 1836 jj = rstart; 1837 smycols = mycols; 1838 svals = vals; 1839 for (i=0; i<m; i++) { 1840 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1841 smycols += ourlens[i]; 1842 svals += ourlens[i]; 1843 jj++; 1844 } 1845 } 1846 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1847 ierr = PetscFree(vals);CHKERRQ(ierr); 1848 ierr = PetscFree(mycols);CHKERRQ(ierr); 1849 ierr = PetscFree(rowners);CHKERRQ(ierr); 1850 1851 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1852 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1853 PetscFunctionReturn(0); 1854 } 1855 1856 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1857 { 1858 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1859 Mat a,b; 1860 PetscBool flg; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 a = matA->A; 1865 b = matB->A; 1866 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1867 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1872 { 1873 PetscErrorCode ierr; 1874 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1875 Mat_TransMatMultDense *atb = a->atbdense; 1876 1877 PetscFunctionBegin; 1878 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1879 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1880 ierr = PetscFree(atb);CHKERRQ(ierr); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1885 { 1886 PetscErrorCode ierr; 1887 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1888 Mat_MatTransMultDense *abt = a->abtdense; 1889 1890 PetscFunctionBegin; 1891 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1892 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1893 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1894 ierr = PetscFree(abt);CHKERRQ(ierr); 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1899 { 1900 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1901 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1902 Mat_TransMatMultDense *atb = c->atbdense; 1903 PetscErrorCode ierr; 1904 MPI_Comm comm; 1905 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1906 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1907 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1908 PetscScalar _DOne=1.0,_DZero=0.0; 1909 PetscBLASInt am,an,bn,aN; 1910 const PetscInt *ranges; 1911 1912 PetscFunctionBegin; 1913 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1914 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1915 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1916 1917 /* compute atbarray = aseq^T * bseq */ 1918 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1919 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1920 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1921 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1922 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1923 1924 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1925 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1926 1927 /* arrange atbarray into sendbuf */ 1928 k = 0; 1929 for (proc=0; proc<size; proc++) { 1930 for (j=0; j<cN; j++) { 1931 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1932 } 1933 } 1934 /* sum all atbarray to local values of C */ 1935 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1936 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1937 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1942 { 1943 PetscErrorCode ierr; 1944 Mat Cdense; 1945 MPI_Comm comm; 1946 PetscMPIInt size; 1947 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1948 Mat_MPIDense *c; 1949 Mat_TransMatMultDense *atb; 1950 1951 PetscFunctionBegin; 1952 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1953 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1954 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1955 } 1956 1957 /* create matrix product Cdense */ 1958 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1959 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1960 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1961 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1962 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 *C = Cdense; 1965 1966 /* create data structure for reuse Cdense */ 1967 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1968 ierr = PetscNew(&atb);CHKERRQ(ierr); 1969 cM = Cdense->rmap->N; 1970 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1971 1972 c = (Mat_MPIDense*)Cdense->data; 1973 c->atbdense = atb; 1974 atb->destroy = Cdense->ops->destroy; 1975 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1976 PetscFunctionReturn(0); 1977 } 1978 1979 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1980 { 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 if (scall == MAT_INITIAL_MATRIX) { 1985 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1986 } 1987 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1988 PetscFunctionReturn(0); 1989 } 1990 1991 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 1992 { 1993 PetscErrorCode ierr; 1994 Mat Cdense; 1995 MPI_Comm comm; 1996 PetscMPIInt i, size; 1997 PetscInt maxRows, bufsiz; 1998 Mat_MPIDense *c; 1999 PetscMPIInt tag; 2000 const char *algTypes[2] = {"allgatherv","cyclic"}; 2001 PetscInt alg, nalg = 2; 2002 Mat_MatTransMultDense *abt; 2003 2004 PetscFunctionBegin; 2005 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2006 alg = 0; /* default is allgatherv */ 2007 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2008 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 2009 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2010 if (A->cmap->N != B->cmap->N) { 2011 SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 2012 } 2013 2014 /* create matrix product Cdense */ 2015 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2016 ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2017 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2018 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2019 ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 2020 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2021 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 *C = Cdense; 2023 2024 /* create data structure for reuse Cdense */ 2025 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2026 ierr = PetscNew(&abt);CHKERRQ(ierr); 2027 abt->tag = tag; 2028 abt->alg = alg; 2029 switch (alg) { 2030 case 1: 2031 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2032 bufsiz = A->cmap->N * maxRows; 2033 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2034 break; 2035 default: 2036 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2037 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2038 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2039 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2040 break; 2041 } 2042 2043 c = (Mat_MPIDense*)Cdense->data; 2044 c->abtdense = abt; 2045 abt->destroy = Cdense->ops->destroy; 2046 Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2047 PetscFunctionReturn(0); 2048 } 2049 2050 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2051 { 2052 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2053 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2054 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2055 Mat_MatTransMultDense *abt = c->abtdense; 2056 PetscErrorCode ierr; 2057 MPI_Comm comm; 2058 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2059 PetscScalar *sendbuf, *recvbuf=0, *carray; 2060 PetscInt i,cK=A->cmap->N,k,j,bn; 2061 PetscScalar _DOne=1.0,_DZero=0.0; 2062 PetscBLASInt cm, cn, ck; 2063 MPI_Request reqs[2]; 2064 const PetscInt *ranges; 2065 2066 PetscFunctionBegin; 2067 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2068 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2070 2071 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2072 bn = B->rmap->n; 2073 if (bseq->lda == bn) { 2074 sendbuf = bseq->v; 2075 } else { 2076 sendbuf = abt->buf[0]; 2077 for (k = 0, i = 0; i < cK; i++) { 2078 for (j = 0; j < bn; j++, k++) { 2079 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2080 } 2081 } 2082 } 2083 if (size > 1) { 2084 sendto = (rank + size - 1) % size; 2085 recvfrom = (rank + size + 1) % size; 2086 } else { 2087 sendto = recvfrom = 0; 2088 } 2089 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2090 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2091 recvisfrom = rank; 2092 for (i = 0; i < size; i++) { 2093 /* we have finished receiving in sending, bufs can be read/modified */ 2094 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2095 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2096 2097 if (nextrecvisfrom != rank) { 2098 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2099 sendsiz = cK * bn; 2100 recvsiz = cK * nextbn; 2101 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2102 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2103 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2104 } 2105 2106 /* local aseq * sendbuf^T */ 2107 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2108 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 2109 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 2110 2111 if (nextrecvisfrom != rank) { 2112 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2113 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2114 } 2115 bn = nextbn; 2116 recvisfrom = nextrecvisfrom; 2117 sendbuf = recvbuf; 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2123 { 2124 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2125 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2126 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2127 Mat_MatTransMultDense *abt = c->abtdense; 2128 PetscErrorCode ierr; 2129 MPI_Comm comm; 2130 PetscMPIInt rank,size; 2131 PetscScalar *sendbuf, *recvbuf; 2132 PetscInt i,cK=A->cmap->N,k,j,bn; 2133 PetscScalar _DOne=1.0,_DZero=0.0; 2134 PetscBLASInt cm, cn, ck; 2135 2136 PetscFunctionBegin; 2137 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2138 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2139 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2140 2141 /* copy transpose of B into buf[0] */ 2142 bn = B->rmap->n; 2143 sendbuf = abt->buf[0]; 2144 recvbuf = abt->buf[1]; 2145 for (k = 0, j = 0; j < bn; j++) { 2146 for (i = 0; i < cK; i++, k++) { 2147 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2148 } 2149 } 2150 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2151 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2152 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2153 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2154 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 2155 PetscFunctionReturn(0); 2156 } 2157 2158 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2159 { 2160 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2161 Mat_MatTransMultDense *abt = c->abtdense; 2162 PetscErrorCode ierr; 2163 2164 PetscFunctionBegin; 2165 switch (abt->alg) { 2166 case 1: 2167 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2168 break; 2169 default: 2170 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2171 break; 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 2177 { 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 if (scall == MAT_INITIAL_MATRIX) { 2182 ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2183 } 2184 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2189 { 2190 PetscErrorCode ierr; 2191 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2192 Mat_MatMultDense *ab = a->abdense; 2193 2194 PetscFunctionBegin; 2195 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2196 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2197 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2198 2199 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2200 ierr = PetscFree(ab);CHKERRQ(ierr); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #if defined(PETSC_HAVE_ELEMENTAL) 2205 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2206 { 2207 PetscErrorCode ierr; 2208 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2209 Mat_MatMultDense *ab=c->abdense; 2210 2211 PetscFunctionBegin; 2212 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2213 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2214 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2215 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 2220 { 2221 PetscErrorCode ierr; 2222 Mat Ae,Be,Ce; 2223 Mat_MPIDense *c; 2224 Mat_MatMultDense *ab; 2225 2226 PetscFunctionBegin; 2227 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2228 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2229 } 2230 2231 /* convert A and B to Elemental matrices Ae and Be */ 2232 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2233 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2234 2235 /* Ce = Ae*Be */ 2236 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2237 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2238 2239 /* convert Ce to C */ 2240 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2241 2242 /* create data structure for reuse Cdense */ 2243 ierr = PetscNew(&ab);CHKERRQ(ierr); 2244 c = (Mat_MPIDense*)(*C)->data; 2245 c->abdense = ab; 2246 2247 ab->Ae = Ae; 2248 ab->Be = Be; 2249 ab->Ce = Ce; 2250 ab->destroy = (*C)->ops->destroy; 2251 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2252 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2253 PetscFunctionReturn(0); 2254 } 2255 2256 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2257 { 2258 PetscErrorCode ierr; 2259 2260 PetscFunctionBegin; 2261 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2262 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2263 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2264 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2265 } else { 2266 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2267 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2268 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2269 } 2270 PetscFunctionReturn(0); 2271 } 2272 #endif 2273 2274