1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseGetLocalMatrix" 13 /*@ 14 15 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the Seq or MPI dense matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 @*/ 27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 28 { 29 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 30 PetscErrorCode ierr; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 35 if (flg) *B = mat->A; 36 else *B = A; 37 PetscFunctionReturn(0); 38 } 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatGetRow_MPIDense" 42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 43 { 44 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 45 PetscErrorCode ierr; 46 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 47 48 PetscFunctionBegin; 49 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 50 lrow = row - rstart; 51 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 55 #undef __FUNCT__ 56 #define __FUNCT__ "MatRestoreRow_MPIDense" 57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 58 { 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 63 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 Mat B; 77 78 PetscFunctionBegin; 79 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 80 81 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 82 if (!B) { 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 for (i=0; i<m; i++) { 110 if (idxm[i] < 0) continue; 111 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 112 if (idxm[i] >= rstart && idxm[i] < rend) { 113 row = idxm[i] - rstart; 114 if (roworiented) { 115 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 116 } else { 117 for (j=0; j<n; j++) { 118 if (idxn[j] < 0) continue; 119 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 120 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 121 } 122 } 123 } else if (!A->donotstash) { 124 mat->assembled = PETSC_FALSE; 125 if (roworiented) { 126 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 127 } else { 128 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetValues_MPIDense" 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatDenseGetArray_MPIDense" 161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 174 { 175 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 176 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 177 PetscErrorCode ierr; 178 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 179 const PetscInt *irow,*icol; 180 PetscScalar *av,*bv,*v = lmat->v; 181 Mat newmat; 182 IS iscol_local; 183 184 PetscFunctionBegin; 185 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 186 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 187 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 190 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 191 192 /* No parallel redistribution currently supported! Should really check each index set 193 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 194 original matrix! */ 195 196 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 197 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 198 199 /* Check submatrix call */ 200 if (scall == MAT_REUSE_MATRIX) { 201 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 202 /* Really need to test rows and column sizes! */ 203 newmat = *B; 204 } else { 205 /* Create and fill new matrix */ 206 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 207 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 208 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 209 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 210 } 211 212 /* Now extract the data pointers and do the copy, column at a time */ 213 newmatd = (Mat_MPIDense*)newmat->data; 214 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 215 216 for (i=0; i<Ncols; i++) { 217 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 218 for (j=0; j<nrows; j++) { 219 *bv++ = av[irow[j] - rstart]; 220 } 221 } 222 223 /* Assemble the matrices so that the correct flags are set */ 224 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 227 /* Free work space */ 228 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 230 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 231 *B = newmat; 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 280 PetscFunctionBegin; 281 /* wait on receives */ 282 while (1) { 283 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 284 if (!flg) break; 285 286 for (i=0; i<n;) { 287 /* Now identify the consecutive vals belonging to the same row */ 288 for (j=i,rstart=row[j]; j<n; j++) { 289 if (row[j] != rstart) break; 290 } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap->range; 334 PetscInt *sizes,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscBool found; 343 const PetscScalar *xx; 344 PetscScalar *bb; 345 346 PetscFunctionBegin; 347 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 348 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 352 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 353 for (i=0; i<N; i++) { 354 idx = rows[i]; 355 found = PETSC_FALSE; 356 for (j=0; j<size; j++) { 357 if (idx >= owners[j] && idx < owners[j+1]) { 358 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 359 } 360 } 361 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 362 } 363 nsends = 0; 364 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 365 366 /* inform other processors of number of messages and max length*/ 367 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 368 369 /* post receives: */ 370 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 371 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 372 for (i=0; i<nrecvs; i++) { 373 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 374 } 375 376 /* do sends: 377 1) starts[i] gives the starting index in svalues for stuff going to 378 the ith processor 379 */ 380 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 381 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 382 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 383 384 starts[0] = 0; 385 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 386 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 387 388 starts[0] = 0; 389 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 390 count = 0; 391 for (i=0; i<size; i++) { 392 if (sizes[2*i+1]) { 393 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 394 } 395 } 396 ierr = PetscFree(starts);CHKERRQ(ierr); 397 398 base = owners[rank]; 399 400 /* wait on receives */ 401 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 402 count = nrecvs; 403 slen = 0; 404 while (count) { 405 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 406 /* unpack receives into our local space */ 407 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 408 409 source[imdex] = recv_status.MPI_SOURCE; 410 lens[imdex] = n; 411 slen += n; 412 count--; 413 } 414 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 415 416 /* move the data into the send scatter */ 417 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 418 count = 0; 419 for (i=0; i<nrecvs; i++) { 420 values = rvalues + i*nmax; 421 for (j=0; j<lens[i]; j++) { 422 lrows[count++] = values[j] - base; 423 } 424 } 425 ierr = PetscFree(rvalues);CHKERRQ(ierr); 426 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 427 ierr = PetscFree(owner);CHKERRQ(ierr); 428 ierr = PetscFree(sizes);CHKERRQ(ierr); 429 430 /* fix right hand side if needed */ 431 if (x && b) { 432 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 433 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 434 for (i=0; i<slen; i++) { 435 bb[lrows[i]] = diag*xx[lrows[i]]; 436 } 437 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 439 } 440 441 /* actually zap the local rows */ 442 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 443 if (diag != 0.0) { 444 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 445 PetscInt m = ll->lda, i; 446 447 for (i=0; i<slen; i++) { 448 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 449 } 450 } 451 ierr = PetscFree(lrows);CHKERRQ(ierr); 452 453 /* wait on sends */ 454 if (nsends) { 455 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 456 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 457 ierr = PetscFree(send_status);CHKERRQ(ierr); 458 } 459 ierr = PetscFree(send_waits);CHKERRQ(ierr); 460 ierr = PetscFree(svalues);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMult_MPIDense" 466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatMultAdd_MPIDense" 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 PetscErrorCode ierr; 484 485 PetscFunctionBegin; 486 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatMultTranspose_MPIDense" 494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 495 { 496 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 497 PetscErrorCode ierr; 498 PetscScalar zero = 0.0; 499 500 PetscFunctionBegin; 501 ierr = VecSet(yy,zero);CHKERRQ(ierr); 502 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 503 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 504 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 517 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 518 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 519 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatGetDiagonal_MPIDense" 525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 526 { 527 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 528 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 529 PetscErrorCode ierr; 530 PetscInt len,i,n,m = A->rmap->n,radd; 531 PetscScalar *x,zero = 0.0; 532 533 PetscFunctionBegin; 534 ierr = VecSet(v,zero);CHKERRQ(ierr); 535 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 536 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 537 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 538 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 539 radd = A->rmap->rstart*m; 540 for (i=0; i<len; i++) { 541 x[i] = aloc->v[radd + i*m + i]; 542 } 543 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatDestroy_MPIDense" 549 PetscErrorCode MatDestroy_MPIDense(Mat mat) 550 { 551 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 #if defined(PETSC_USE_LOG) 556 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 557 #endif 558 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 559 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 560 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 561 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 562 563 ierr = PetscFree(mat->data);CHKERRQ(ierr); 564 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 565 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 568 #if defined(PETSC_HAVE_ELEMENTAL) 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 570 #endif 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 595 PetscFunctionBegin; 596 if (mdn->size == 1) { 597 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 598 } else { 599 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 600 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 601 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 602 603 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 604 if (format == PETSC_VIEWER_NATIVE) { 605 606 if (!rank) { 607 /* store the matrix as a dense matrix */ 608 header[0] = MAT_FILE_CLASSID; 609 header[1] = mat->rmap->N; 610 header[2] = N; 611 header[3] = MATRIX_BINARY_FORMAT_DENSE; 612 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 613 614 /* get largest work array needed for transposing array */ 615 mmax = mat->rmap->n; 616 for (i=1; i<size; i++) { 617 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 618 } 619 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 620 621 /* write out local array, by rows */ 622 m = mat->rmap->n; 623 v = a->v; 624 for (j=0; j<N; j++) { 625 for (i=0; i<m; i++) { 626 work[j + i*N] = *v++; 627 } 628 } 629 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 630 /* get largest work array to receive messages from other processes, excludes process zero */ 631 mmax = 0; 632 for (i=1; i<size; i++) { 633 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 634 } 635 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 636 for (k = 1; k < size; k++) { 637 v = vv; 638 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 639 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 641 for (j = 0; j < N; j++) { 642 for (i = 0; i < m; i++) { 643 work[j + i*N] = *v++; 644 } 645 } 646 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 647 } 648 ierr = PetscFree(work);CHKERRQ(ierr); 649 ierr = PetscFree(vv);CHKERRQ(ierr); 650 } else { 651 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 652 } 653 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 654 } 655 PetscFunctionReturn(0); 656 } 657 658 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 659 #include <petscdraw.h> 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 663 { 664 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 665 PetscErrorCode ierr; 666 PetscMPIInt rank = mdn->rank; 667 PetscViewerType vtype; 668 PetscBool iascii,isdraw; 669 PetscViewer sviewer; 670 PetscViewerFormat format; 671 672 PetscFunctionBegin; 673 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 674 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 675 if (iascii) { 676 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 677 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 678 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 679 MatInfo info; 680 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 683 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 684 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 685 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } else if (format == PETSC_VIEWER_ASCII_INFO) { 688 PetscFunctionReturn(0); 689 } 690 } else if (isdraw) { 691 PetscDraw draw; 692 PetscBool isnull; 693 694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 696 if (isnull) PetscFunctionReturn(0); 697 } 698 699 { 700 /* assemble the entire matrix onto first processor. */ 701 Mat A; 702 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 703 PetscInt *cols; 704 PetscScalar *vals; 705 706 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 707 if (!rank) { 708 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 711 } 712 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 713 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 714 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 715 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 716 717 /* Copy the matrix ... This isn't the most efficient means, 718 but it's quick for now */ 719 A->insertmode = INSERT_VALUES; 720 721 row = mat->rmap->rstart; 722 m = mdn->A->rmap->n; 723 for (i=0; i<m; i++) { 724 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 725 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 726 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 727 row++; 728 } 729 730 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 731 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 732 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 733 if (!rank) { 734 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 735 } 736 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 737 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 738 ierr = MatDestroy(&A);CHKERRQ(ierr); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIDense" 745 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 746 { 747 PetscErrorCode ierr; 748 PetscBool iascii,isbinary,isdraw,issocket; 749 750 PetscFunctionBegin; 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 753 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 754 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 755 756 if (iascii || issocket || isdraw) { 757 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 758 } else if (isbinary) { 759 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetInfo_MPIDense" 766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 767 { 768 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 769 Mat mdn = mat->A; 770 PetscErrorCode ierr; 771 PetscReal isend[5],irecv[5]; 772 773 PetscFunctionBegin; 774 info->block_size = 1.0; 775 776 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 777 778 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 779 isend[3] = info->memory; isend[4] = info->mallocs; 780 if (flag == MAT_LOCAL) { 781 info->nz_used = isend[0]; 782 info->nz_allocated = isend[1]; 783 info->nz_unneeded = isend[2]; 784 info->memory = isend[3]; 785 info->mallocs = isend[4]; 786 } else if (flag == MAT_GLOBAL_MAX) { 787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 788 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } else if (flag == MAT_GLOBAL_SUM) { 795 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 796 797 info->nz_used = irecv[0]; 798 info->nz_allocated = irecv[1]; 799 info->nz_unneeded = irecv[2]; 800 info->memory = irecv[3]; 801 info->mallocs = irecv[4]; 802 } 803 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 804 info->fill_ratio_needed = 0; 805 info->factor_mallocs = 0; 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetOption_MPIDense" 811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 812 { 813 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 switch (op) { 818 case MAT_NEW_NONZERO_LOCATIONS: 819 case MAT_NEW_NONZERO_LOCATION_ERR: 820 case MAT_NEW_NONZERO_ALLOCATION_ERR: 821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 822 break; 823 case MAT_ROW_ORIENTED: 824 a->roworiented = flg; 825 826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 827 break; 828 case MAT_NEW_DIAGONALS: 829 case MAT_KEEP_NONZERO_PATTERN: 830 case MAT_USE_HASH_TABLE: 831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 832 break; 833 case MAT_IGNORE_OFF_PROC_ENTRIES: 834 a->donotstash = flg; 835 break; 836 case MAT_SYMMETRIC: 837 case MAT_STRUCTURALLY_SYMMETRIC: 838 case MAT_HERMITIAN: 839 case MAT_SYMMETRY_ETERNAL: 840 case MAT_IGNORE_LOWER_TRIANGULAR: 841 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 842 break; 843 default: 844 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatDiagonalScale_MPIDense" 852 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 853 { 854 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 855 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 856 PetscScalar *l,*r,x,*v; 857 PetscErrorCode ierr; 858 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 859 860 PetscFunctionBegin; 861 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 862 if (ll) { 863 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 864 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 865 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 866 for (i=0; i<m; i++) { 867 x = l[i]; 868 v = mat->v + i; 869 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 870 } 871 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 872 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 873 } 874 if (rr) { 875 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 876 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 877 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 880 for (i=0; i<n; i++) { 881 x = r[i]; 882 v = mat->v + i*m; 883 for (j=0; j<m; j++) (*v++) *= x; 884 } 885 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 886 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatNorm_MPIDense" 893 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 894 { 895 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 896 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 897 PetscErrorCode ierr; 898 PetscInt i,j; 899 PetscReal sum = 0.0; 900 PetscScalar *v = mat->v; 901 902 PetscFunctionBegin; 903 if (mdn->size == 1) { 904 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 905 } else { 906 if (type == NORM_FROBENIUS) { 907 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 908 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 909 } 910 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 911 *nrm = PetscSqrtReal(*nrm); 912 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 913 } else if (type == NORM_1) { 914 PetscReal *tmp,*tmp2; 915 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 916 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 917 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 918 *nrm = 0.0; 919 v = mat->v; 920 for (j=0; j<mdn->A->cmap->n; j++) { 921 for (i=0; i<mdn->A->rmap->n; i++) { 922 tmp[j] += PetscAbsScalar(*v); v++; 923 } 924 } 925 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 926 for (j=0; j<A->cmap->N; j++) { 927 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 928 } 929 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 930 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 931 } else if (type == NORM_INFINITY) { /* max row norm */ 932 PetscReal ntemp; 933 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 934 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 935 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 936 } 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatTranspose_MPIDense" 942 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 943 { 944 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 945 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 946 Mat B; 947 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 948 PetscErrorCode ierr; 949 PetscInt j,i; 950 PetscScalar *v; 951 952 PetscFunctionBegin; 953 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 954 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 955 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 956 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 957 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 958 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 959 } else { 960 B = *matout; 961 } 962 963 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 964 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 965 for (i=0; i<m; i++) rwork[i] = rstart + i; 966 for (j=0; j<n; j++) { 967 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 968 v += m; 969 } 970 ierr = PetscFree(rwork);CHKERRQ(ierr); 971 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 974 *matout = B; 975 } else { 976 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 977 } 978 PetscFunctionReturn(0); 979 } 980 981 982 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 983 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatSetUp_MPIDense" 987 PetscErrorCode MatSetUp_MPIDense(Mat A) 988 { 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatAXPY_MPIDense" 998 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 999 { 1000 PetscErrorCode ierr; 1001 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1002 1003 PetscFunctionBegin; 1004 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1005 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatConjugate_MPIDense" 1011 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1012 { 1013 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatRealPart_MPIDense" 1023 PetscErrorCode MatRealPart_MPIDense(Mat A) 1024 { 1025 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1026 PetscErrorCode ierr; 1027 1028 PetscFunctionBegin; 1029 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1035 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1036 { 1037 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1038 PetscErrorCode ierr; 1039 1040 PetscFunctionBegin; 1041 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1048 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1049 { 1050 PetscErrorCode ierr; 1051 PetscInt i,n; 1052 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1053 PetscReal *work; 1054 1055 PetscFunctionBegin; 1056 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1057 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1058 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1059 if (type == NORM_2) { 1060 for (i=0; i<n; i++) work[i] *= work[i]; 1061 } 1062 if (type == NORM_INFINITY) { 1063 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1064 } else { 1065 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(work);CHKERRQ(ierr); 1068 if (type == NORM_2) { 1069 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1070 } 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatSetRandom_MPIDense" 1076 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1077 { 1078 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1079 PetscErrorCode ierr; 1080 PetscScalar *a; 1081 PetscInt m,n,i; 1082 1083 PetscFunctionBegin; 1084 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1085 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1086 for (i=0; i<m*n; i++) { 1087 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1088 } 1089 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMissingDiagonal_MPIDense" 1097 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1098 { 1099 PetscFunctionBegin; 1100 *missing = PETSC_FALSE; 1101 PetscFunctionReturn(0); 1102 } 1103 1104 /* -------------------------------------------------------------------*/ 1105 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1106 MatGetRow_MPIDense, 1107 MatRestoreRow_MPIDense, 1108 MatMult_MPIDense, 1109 /* 4*/ MatMultAdd_MPIDense, 1110 MatMultTranspose_MPIDense, 1111 MatMultTransposeAdd_MPIDense, 1112 0, 1113 0, 1114 0, 1115 /* 10*/ 0, 1116 0, 1117 0, 1118 0, 1119 MatTranspose_MPIDense, 1120 /* 15*/ MatGetInfo_MPIDense, 1121 MatEqual_MPIDense, 1122 MatGetDiagonal_MPIDense, 1123 MatDiagonalScale_MPIDense, 1124 MatNorm_MPIDense, 1125 /* 20*/ MatAssemblyBegin_MPIDense, 1126 MatAssemblyEnd_MPIDense, 1127 MatSetOption_MPIDense, 1128 MatZeroEntries_MPIDense, 1129 /* 24*/ MatZeroRows_MPIDense, 1130 0, 1131 0, 1132 0, 1133 0, 1134 /* 29*/ MatSetUp_MPIDense, 1135 0, 1136 0, 1137 0, 1138 0, 1139 /* 34*/ MatDuplicate_MPIDense, 1140 0, 1141 0, 1142 0, 1143 0, 1144 /* 39*/ MatAXPY_MPIDense, 1145 MatGetSubMatrices_MPIDense, 1146 0, 1147 MatGetValues_MPIDense, 1148 0, 1149 /* 44*/ 0, 1150 MatScale_MPIDense, 1151 MatShift_Basic, 1152 0, 1153 0, 1154 /* 49*/ MatSetRandom_MPIDense, 1155 0, 1156 0, 1157 0, 1158 0, 1159 /* 54*/ 0, 1160 0, 1161 0, 1162 0, 1163 0, 1164 /* 59*/ MatGetSubMatrix_MPIDense, 1165 MatDestroy_MPIDense, 1166 MatView_MPIDense, 1167 0, 1168 0, 1169 /* 64*/ 0, 1170 0, 1171 0, 1172 0, 1173 0, 1174 /* 69*/ 0, 1175 0, 1176 0, 1177 0, 1178 0, 1179 /* 74*/ 0, 1180 0, 1181 0, 1182 0, 1183 0, 1184 /* 79*/ 0, 1185 0, 1186 0, 1187 0, 1188 /* 83*/ MatLoad_MPIDense, 1189 0, 1190 0, 1191 0, 1192 0, 1193 0, 1194 /* 89*/ MatMatMult_MPIDense_MPIDense, 1195 MatMatMultSymbolic_MPIDense_MPIDense, 1196 MatMatMultNumeric_MPIDense, 1197 0, 1198 0, 1199 /* 94*/ 0, 1200 0, 1201 0, 1202 0, 1203 0, 1204 /* 99*/ 0, 1205 0, 1206 0, 1207 MatConjugate_MPIDense, 1208 0, 1209 /*104*/ 0, 1210 MatRealPart_MPIDense, 1211 MatImaginaryPart_MPIDense, 1212 0, 1213 0, 1214 /*109*/ 0, 1215 0, 1216 0, 1217 0, 1218 MatMissingDiagonal_MPIDense, 1219 /*114*/ 0, 1220 0, 1221 0, 1222 0, 1223 0, 1224 /*119*/ 0, 1225 0, 1226 0, 1227 0, 1228 0, 1229 /*124*/ 0, 1230 MatGetColumnNorms_MPIDense, 1231 0, 1232 0, 1233 0, 1234 /*129*/ 0, 1235 MatTransposeMatMult_MPIDense_MPIDense, 1236 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1237 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1238 0, 1239 /*134*/ 0, 1240 0, 1241 0, 1242 0, 1243 0, 1244 /*139*/ 0, 1245 0, 1246 0 1247 }; 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1251 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1252 { 1253 Mat_MPIDense *a; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 mat->preallocated = PETSC_TRUE; 1258 /* Note: For now, when data is specified above, this assumes the user correctly 1259 allocates the local dense storage space. We should add error checking. */ 1260 1261 a = (Mat_MPIDense*)mat->data; 1262 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1263 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1264 a->nvec = mat->cmap->n; 1265 1266 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1267 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1268 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1269 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1270 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #if defined(PETSC_HAVE_ELEMENTAL) 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatConvert_MPIDense_Elemental" 1277 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1278 { 1279 Mat mat_elemental; 1280 PetscErrorCode ierr; 1281 PetscScalar *array,*v_rowwise; 1282 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,j,k,*rows,*cols; 1283 1284 PetscFunctionBegin; 1285 ierr = PetscMalloc3(m*N,&v_rowwise,m,&rows,N,&cols);CHKERRQ(ierr); 1286 ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr); 1287 /* convert column-wise array into row-wise v_rowwise, see MatSetValues_Elemental() */ 1288 k = 0; 1289 for (j=0; j<N; j++) { 1290 cols[j] = j; 1291 for (i=0; i<m; i++) { 1292 v_rowwise[i*N+j] = array[k++]; 1293 } 1294 } 1295 for (i=0; i<m; i++) { 1296 rows[i] = rstart + i; 1297 } 1298 ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr); 1299 1300 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1301 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1302 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1303 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1304 1305 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1306 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v_rowwise,ADD_VALUES);CHKERRQ(ierr); 1307 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1308 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1309 ierr = PetscFree3(v_rowwise,rows,cols);CHKERRQ(ierr); 1310 1311 if (reuse == MAT_REUSE_MATRIX) { 1312 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1313 } else { 1314 *newmat = mat_elemental; 1315 } 1316 PetscFunctionReturn(0); 1317 } 1318 #endif 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatCreate_MPIDense" 1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1323 { 1324 Mat_MPIDense *a; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1329 mat->data = (void*)a; 1330 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1331 1332 mat->insertmode = NOT_SET_VALUES; 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1334 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1335 1336 /* build cache for off array entries formed */ 1337 a->donotstash = PETSC_FALSE; 1338 1339 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1340 1341 /* stuff used for matrix vector multiply */ 1342 a->lvec = 0; 1343 a->Mvctx = 0; 1344 a->roworiented = PETSC_TRUE; 1345 1346 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1347 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1348 #if defined(PETSC_HAVE_ELEMENTAL) 1349 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1350 #endif 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1356 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 /*MC 1365 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1366 1367 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1368 and MATMPIDENSE otherwise. 1369 1370 Options Database Keys: 1371 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1372 1373 Level: beginner 1374 1375 1376 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1377 M*/ 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1381 /*@C 1382 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1383 1384 Not collective 1385 1386 Input Parameters: 1387 . B - the matrix 1388 - data - optional location of matrix data. Set data=NULL for PETSc 1389 to control all matrix memory allocation. 1390 1391 Notes: 1392 The dense format is fully compatible with standard Fortran 77 1393 storage by columns. 1394 1395 The data input variable is intended primarily for Fortran programmers 1396 who wish to allocate their own matrix memory space. Most users should 1397 set data=NULL. 1398 1399 Level: intermediate 1400 1401 .keywords: matrix,dense, parallel 1402 1403 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1404 @*/ 1405 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1406 { 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatCreateDense" 1416 /*@C 1417 MatCreateDense - Creates a parallel matrix in dense format. 1418 1419 Collective on MPI_Comm 1420 1421 Input Parameters: 1422 + comm - MPI communicator 1423 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1424 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1425 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1426 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1427 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1428 to control all matrix memory allocation. 1429 1430 Output Parameter: 1431 . A - the matrix 1432 1433 Notes: 1434 The dense format is fully compatible with standard Fortran 77 1435 storage by columns. 1436 1437 The data input variable is intended primarily for Fortran programmers 1438 who wish to allocate their own matrix memory space. Most users should 1439 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1440 1441 The user MUST specify either the local or global matrix dimensions 1442 (possibly both). 1443 1444 Level: intermediate 1445 1446 .keywords: matrix,dense, parallel 1447 1448 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1449 @*/ 1450 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1451 { 1452 PetscErrorCode ierr; 1453 PetscMPIInt size; 1454 1455 PetscFunctionBegin; 1456 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1457 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1458 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1459 if (size > 1) { 1460 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1461 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1462 if (data) { /* user provided data array, so no need to assemble */ 1463 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1464 (*A)->assembled = PETSC_TRUE; 1465 } 1466 } else { 1467 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1468 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatDuplicate_MPIDense" 1475 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1476 { 1477 Mat mat; 1478 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 *newmat = 0; 1483 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1484 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1485 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1486 a = (Mat_MPIDense*)mat->data; 1487 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1488 1489 mat->factortype = A->factortype; 1490 mat->assembled = PETSC_TRUE; 1491 mat->preallocated = PETSC_TRUE; 1492 1493 a->size = oldmat->size; 1494 a->rank = oldmat->rank; 1495 mat->insertmode = NOT_SET_VALUES; 1496 a->nvec = oldmat->nvec; 1497 a->donotstash = oldmat->donotstash; 1498 1499 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1500 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1501 1502 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1503 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1504 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1505 1506 *newmat = mat; 1507 PetscFunctionReturn(0); 1508 } 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1512 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1513 { 1514 PetscErrorCode ierr; 1515 PetscMPIInt rank,size; 1516 const PetscInt *rowners; 1517 PetscInt i,m,n,nz,j,mMax; 1518 PetscScalar *array,*vals,*vals_ptr; 1519 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1520 1521 PetscFunctionBegin; 1522 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1523 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1524 1525 /* determine ownership of rows and columns */ 1526 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1527 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1528 1529 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1530 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1531 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1532 } 1533 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1534 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1535 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1536 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1537 if (!rank) { 1538 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1539 1540 /* read in my part of the matrix numerical values */ 1541 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1542 1543 /* insert into matrix-by row (this is why cannot directly read into array */ 1544 vals_ptr = vals; 1545 for (i=0; i<m; i++) { 1546 for (j=0; j<N; j++) { 1547 array[i + j*m] = *vals_ptr++; 1548 } 1549 } 1550 1551 /* read in other processors and ship out */ 1552 for (i=1; i<size; i++) { 1553 nz = (rowners[i+1] - rowners[i])*N; 1554 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1555 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1556 } 1557 } else { 1558 /* receive numeric values */ 1559 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1560 1561 /* receive message of values*/ 1562 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1563 1564 /* insert into matrix-by row (this is why cannot directly read into array */ 1565 vals_ptr = vals; 1566 for (i=0; i<m; i++) { 1567 for (j=0; j<N; j++) { 1568 array[i + j*m] = *vals_ptr++; 1569 } 1570 } 1571 } 1572 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1573 ierr = PetscFree(vals);CHKERRQ(ierr); 1574 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1575 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatLoad_MPIDense" 1581 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1582 { 1583 Mat_MPIDense *a; 1584 PetscScalar *vals,*svals; 1585 MPI_Comm comm; 1586 MPI_Status status; 1587 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1588 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1589 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1590 PetscInt i,nz,j,rstart,rend; 1591 int fd; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 /* force binary viewer to load .info file if it has not yet done so */ 1596 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1597 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1601 if (!rank) { 1602 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1603 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1604 } 1605 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1606 M = header[1]; N = header[2]; nz = header[3]; 1607 1608 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1609 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1610 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1611 1612 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1613 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1614 1615 /* 1616 Handle case where matrix is stored on disk as a dense matrix 1617 */ 1618 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1619 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 /* determine ownership of all rows */ 1624 if (newmat->rmap->n < 0) { 1625 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1626 } else { 1627 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1628 } 1629 if (newmat->cmap->n < 0) { 1630 n = PETSC_DECIDE; 1631 } else { 1632 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1633 } 1634 1635 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1636 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1637 rowners[0] = 0; 1638 for (i=2; i<=size; i++) { 1639 rowners[i] += rowners[i-1]; 1640 } 1641 rstart = rowners[rank]; 1642 rend = rowners[rank+1]; 1643 1644 /* distribute row lengths to all processors */ 1645 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1646 if (!rank) { 1647 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1648 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1649 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1650 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1651 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1652 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1653 } else { 1654 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1655 } 1656 1657 if (!rank) { 1658 /* calculate the number of nonzeros on each processor */ 1659 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1660 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1661 for (i=0; i<size; i++) { 1662 for (j=rowners[i]; j< rowners[i+1]; j++) { 1663 procsnz[i] += rowlengths[j]; 1664 } 1665 } 1666 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1667 1668 /* determine max buffer needed and allocate it */ 1669 maxnz = 0; 1670 for (i=0; i<size; i++) { 1671 maxnz = PetscMax(maxnz,procsnz[i]); 1672 } 1673 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1674 1675 /* read in my part of the matrix column indices */ 1676 nz = procsnz[0]; 1677 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1678 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1679 1680 /* read in every one elses and ship off */ 1681 for (i=1; i<size; i++) { 1682 nz = procsnz[i]; 1683 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1684 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1685 } 1686 ierr = PetscFree(cols);CHKERRQ(ierr); 1687 } else { 1688 /* determine buffer space needed for message */ 1689 nz = 0; 1690 for (i=0; i<m; i++) { 1691 nz += ourlens[i]; 1692 } 1693 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1694 1695 /* receive message of column indices*/ 1696 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1697 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1698 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1699 } 1700 1701 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1702 a = (Mat_MPIDense*)newmat->data; 1703 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1704 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1705 } 1706 1707 if (!rank) { 1708 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1709 1710 /* read in my part of the matrix numerical values */ 1711 nz = procsnz[0]; 1712 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1713 1714 /* insert into matrix */ 1715 jj = rstart; 1716 smycols = mycols; 1717 svals = vals; 1718 for (i=0; i<m; i++) { 1719 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1720 smycols += ourlens[i]; 1721 svals += ourlens[i]; 1722 jj++; 1723 } 1724 1725 /* read in other processors and ship out */ 1726 for (i=1; i<size; i++) { 1727 nz = procsnz[i]; 1728 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1729 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1730 } 1731 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1732 } else { 1733 /* receive numeric values */ 1734 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1735 1736 /* receive message of values*/ 1737 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1738 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1739 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1740 1741 /* insert into matrix */ 1742 jj = rstart; 1743 smycols = mycols; 1744 svals = vals; 1745 for (i=0; i<m; i++) { 1746 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1747 smycols += ourlens[i]; 1748 svals += ourlens[i]; 1749 jj++; 1750 } 1751 } 1752 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1753 ierr = PetscFree(vals);CHKERRQ(ierr); 1754 ierr = PetscFree(mycols);CHKERRQ(ierr); 1755 ierr = PetscFree(rowners);CHKERRQ(ierr); 1756 1757 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1758 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatEqual_MPIDense" 1764 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1765 { 1766 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1767 Mat a,b; 1768 PetscBool flg; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 a = matA->A; 1773 b = matB->A; 1774 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1775 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense" 1781 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1782 { 1783 PetscErrorCode ierr; 1784 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1785 Mat_TransMatMultDense *atb = a->atbdense; 1786 1787 PetscFunctionBegin; 1788 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1789 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1790 ierr = PetscFree(atb);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense" 1796 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1797 { 1798 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1799 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1800 Mat_TransMatMultDense *atb = c->atbdense; 1801 PetscErrorCode ierr; 1802 MPI_Comm comm; 1803 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1804 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1805 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1806 PetscScalar _DOne=1.0,_DZero=0.0; 1807 PetscBLASInt am,an,bn,aN; 1808 const PetscInt *ranges; 1809 1810 PetscFunctionBegin; 1811 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1812 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1813 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1814 1815 /* compute atbarray = aseq^T * bseq */ 1816 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1817 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1818 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1819 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1820 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1821 1822 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1823 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1824 1825 /* arrange atbarray into sendbuf */ 1826 k = 0; 1827 for (proc=0; proc<size; proc++) { 1828 for (j=0; j<cN; j++) { 1829 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1830 } 1831 } 1832 /* sum all atbarray to local values of C */ 1833 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1834 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1835 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense" 1841 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1842 { 1843 PetscErrorCode ierr; 1844 Mat Cdense; 1845 MPI_Comm comm; 1846 PetscMPIInt size; 1847 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1848 Mat_MPIDense *c; 1849 Mat_TransMatMultDense *atb; 1850 1851 PetscFunctionBegin; 1852 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1853 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1854 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1855 } 1856 1857 /* create matrix product Cdense */ 1858 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1859 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1860 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1861 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1862 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1863 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1864 *C = Cdense; 1865 1866 /* create data structure for reuse Cdense */ 1867 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1868 ierr = PetscNew(&atb);CHKERRQ(ierr); 1869 cM = Cdense->rmap->N; 1870 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1871 1872 c = (Mat_MPIDense*)Cdense->data; 1873 c->atbdense = atb; 1874 atb->destroy = Cdense->ops->destroy; 1875 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense" 1881 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1882 { 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 if (scall == MAT_INITIAL_MATRIX) { 1887 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1888 } 1889 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense" 1895 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1896 { 1897 PetscErrorCode ierr; 1898 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1899 Mat_MatMultDense *ab = a->abdense; 1900 1901 PetscFunctionBegin; 1902 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1903 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1904 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1905 1906 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1907 ierr = PetscFree(ab);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1913 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1914 { 1915 PetscErrorCode ierr; 1916 Mat Cnew; 1917 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1918 Mat_MatMultDense *ab=c->abdense; 1919 1920 PetscFunctionBegin; 1921 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1922 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1923 1924 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &ab->Ae);CHKERRQ(ierr); 1925 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &ab->Be);CHKERRQ(ierr); 1926 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1927 1928 1929 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,&Cnew);CHKERRQ(ierr); 1930 1931 C->ops->destroy = ab->destroy; /* prevent struct ab being destroyeed by MatHeaderReplace() */ 1932 ierr = MatHeaderReplace(C,&Cnew);CHKERRQ(ierr); 1933 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1934 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1935 c = (Mat_MPIDense*)C->data; 1936 c->abdense = ab; 1937 PetscFunctionReturn(0); 1938 } 1939 1940 #undef __FUNCT__ 1941 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1942 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1943 { 1944 PetscErrorCode ierr; 1945 MPI_Comm comm; 1946 Mat Ae,Be,Ce; 1947 Mat_MPIDense *c; 1948 Mat_MatMultDense *ab; 1949 1950 PetscFunctionBegin; 1951 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1952 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1953 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1954 } 1955 1956 /* convert A and B to Elemental matrices Ae and Be */ 1957 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1958 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1959 1960 /* Ce = Ae*Be */ 1961 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1962 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1963 1964 /* convert Ce to C */ 1965 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1966 1967 //ierr = MatDestroy(&Ae);CHKERRQ(ierr); 1968 //ierr = MatDestroy(&Be);CHKERRQ(ierr); 1969 //ierr = MatDestroy(&Ce);CHKERRQ(ierr); 1970 1971 /* create data structure for reuse Cdense */ 1972 ierr = PetscNew(&ab);CHKERRQ(ierr); 1973 c = (Mat_MPIDense*)(*C)->data; 1974 c->abdense = ab; 1975 1976 ab->Ae = Ae; 1977 ab->Be = Be; 1978 ab->Ce = Ce; 1979 ab->destroy = (*C)->ops->destroy; 1980 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1981 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1987 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1988 { 1989 PetscErrorCode ierr; 1990 1991 PetscFunctionBegin; 1992 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1993 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1994 } else { 1995 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1996 } 1997 PetscFunctionReturn(0); 1998 } 1999