1 /*$Id: mpidense.c,v 1.149 2001/01/19 23:20:28 balay Exp bsmith $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNC__ 12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,rank,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 27 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 28 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 29 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 30 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33 34 *iscopy = PETSC_TRUE; 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN int MatSetUpMultiply_MPIDense(Mat); 40 41 #undef __FUNC__ 42 #define __FUNC__ "MatSetValues_MPIDense" 43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 44 { 45 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 46 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 47 PetscTruth roworiented = A->roworiented; 48 49 PetscFunctionBegin; 50 for (i=0; i<m; i++) { 51 if (idxm[i] < 0) continue; 52 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 if (roworiented) { 56 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 57 } else { 58 for (j=0; j<n; j++) { 59 if (idxn[j] < 0) continue; 60 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 61 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 62 } 63 } 64 } else { 65 if (!A->donotstash) { 66 if (roworiented) { 67 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 68 } else { 69 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 70 } 71 } 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetValues_MPIDense" 79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 80 { 81 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 82 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 83 84 PetscFunctionBegin; 85 for (i=0; i<m; i++) { 86 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 87 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 88 if (idxm[i] >= rstart && idxm[i] < rend) { 89 row = idxm[i] - rstart; 90 for (j=0; j<n; j++) { 91 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 92 if (idxn[j] >= mat->N) { 93 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 94 } 95 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 96 } 97 } else { 98 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 99 } 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNC__ 105 #define __FUNC__ "MatGetArray_MPIDense" 106 int MatGetArray_MPIDense(Mat A,Scalar **array) 107 { 108 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 109 int ierr; 110 111 PetscFunctionBegin; 112 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNC__ 117 #define __FUNC__ "MatGetSubMatrix_MPIDense" 118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 119 { 120 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 121 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 122 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank; 123 Scalar *av,*bv,*v = lmat->v; 124 Mat newmat; 125 126 PetscFunctionBegin; 127 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 128 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 129 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 130 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 131 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 132 133 /* No parallel redistribution currently supported! Should really check each index set 134 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 135 original matrix! */ 136 137 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 138 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 139 140 /* Check submatrix call */ 141 if (scall == MAT_REUSE_MATRIX) { 142 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 143 /* Really need to test rows and column sizes! */ 144 newmat = *B; 145 } else { 146 /* Create and fill new matrix */ 147 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 148 } 149 150 /* Now extract the data pointers and do the copy, column at a time */ 151 newmatd = (Mat_MPIDense*)newmat->data; 152 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 153 154 for (i=0; i<ncols; i++) { 155 av = v + nlrows*icol[i]; 156 for (j=0; j<nrows; j++) { 157 *bv++ = av[irow[j] - rstart]; 158 } 159 } 160 161 /* Assemble the matrices so that the correct flags are set */ 162 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 165 /* Free work space */ 166 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 167 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 168 *B = newmat; 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNC__ 173 #define __FUNC__ "MatRestoreArray_MPIDense" 174 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 175 { 176 PetscFunctionBegin; 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ "MatAssemblyBegin_MPIDense" 182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 183 { 184 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 185 MPI_Comm comm = mat->comm; 186 int ierr,nstash,reallocs; 187 InsertMode addv; 188 189 PetscFunctionBegin; 190 /* make sure all processors are either in INSERTMODE or ADDMODE */ 191 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 192 if (addv == (ADD_VALUES|INSERT_VALUES)) { 193 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 194 } 195 mat->insertmode = addv; /* in case this processor had no cache */ 196 197 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 198 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 199 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatAssemblyEnd_MPIDense" 205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 206 { 207 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 208 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 209 Scalar *val; 210 InsertMode addv=mat->insertmode; 211 212 PetscFunctionBegin; 213 /* wait on receives */ 214 while (1) { 215 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 216 if (!flg) break; 217 218 for (i=0; i<n;) { 219 /* Now identify the consecutive vals belonging to the same row */ 220 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 221 if (j < n) ncols = j-i; 222 else ncols = n-i; 223 /* Now assemble all these values with a single function call */ 224 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 225 i = j; 226 } 227 } 228 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 229 230 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 235 } 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNC__ 240 #define __FUNC__ "MatZeroEntries_MPIDense" 241 int MatZeroEntries_MPIDense(Mat A) 242 { 243 int ierr; 244 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 245 246 PetscFunctionBegin; 247 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNC__ 252 #define __FUNC__ "MatGetBlockSize_MPIDense" 253 int MatGetBlockSize_MPIDense(Mat A,int *bs) 254 { 255 PetscFunctionBegin; 256 *bs = 1; 257 PetscFunctionReturn(0); 258 } 259 260 /* the code does not do the diagonal entries correctly unless the 261 matrix is square and the column and row owerships are identical. 262 This is a BUG. The only way to fix it seems to be to access 263 mdn->A and mdn->B directly and not through the MatZeroRows() 264 routine. 265 */ 266 #undef __FUNC__ 267 #define __FUNC__ "MatZeroRows_MPIDense" 268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 269 { 270 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 271 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 272 int *procs,*nprocs,j,idx,nsends,*work; 273 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 274 int *rvalues,tag = A->tag,count,base,slen,n,*source; 275 int *lens,imdex,*lrows,*values; 276 MPI_Comm comm = A->comm; 277 MPI_Request *send_waits,*recv_waits; 278 MPI_Status recv_status,*send_status; 279 IS istmp; 280 PetscTruth found; 281 282 PetscFunctionBegin; 283 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 284 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 285 286 /* first count number of contributors to each processor */ 287 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 288 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 289 procs = nprocs + size; 290 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 291 for (i=0; i<N; i++) { 292 idx = rows[i]; 293 found = PETSC_FALSE; 294 for (j=0; j<size; j++) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 297 } 298 } 299 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 300 } 301 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 305 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 306 nmax = work[rank]; 307 nrecvs = work[size+rank]; 308 ierr = PetscFree(work);CHKERRQ(ierr); 309 310 /* post receives: */ 311 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 312 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 313 for (i=0; i<nrecvs; i++) { 314 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 315 } 316 317 /* do sends: 318 1) starts[i] gives the starting index in svalues for stuff going to 319 the ith processor 320 */ 321 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 322 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 323 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 324 starts[0] = 0; 325 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 326 for (i=0; i<N; i++) { 327 svalues[starts[owner[i]]++] = rows[i]; 328 } 329 ISRestoreIndices(is,&rows); 330 331 starts[0] = 0; 332 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 333 count = 0; 334 for (i=0; i<size; i++) { 335 if (procs[i]) { 336 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 337 } 338 } 339 ierr = PetscFree(starts);CHKERRQ(ierr); 340 341 base = owners[rank]; 342 343 /* wait on receives */ 344 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 345 source = lens + nrecvs; 346 count = nrecvs; slen = 0; 347 while (count) { 348 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 349 /* unpack receives into our local space */ 350 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 351 source[imdex] = recv_status.MPI_SOURCE; 352 lens[imdex] = n; 353 slen += n; 354 count--; 355 } 356 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 357 358 /* move the data into the send scatter */ 359 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 360 count = 0; 361 for (i=0; i<nrecvs; i++) { 362 values = rvalues + i*nmax; 363 for (j=0; j<lens[i]; j++) { 364 lrows[count++] = values[j] - base; 365 } 366 } 367 ierr = PetscFree(rvalues);CHKERRQ(ierr); 368 ierr = PetscFree(lens);CHKERRQ(ierr); 369 ierr = PetscFree(owner);CHKERRQ(ierr); 370 ierr = PetscFree(nprocs);CHKERRQ(ierr); 371 372 /* actually zap the local rows */ 373 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 374 PetscLogObjectParent(A,istmp); 375 ierr = PetscFree(lrows);CHKERRQ(ierr); 376 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 377 ierr = ISDestroy(istmp);CHKERRQ(ierr); 378 379 /* wait on sends */ 380 if (nsends) { 381 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 382 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 383 ierr = PetscFree(send_status);CHKERRQ(ierr); 384 } 385 ierr = PetscFree(send_waits);CHKERRQ(ierr); 386 ierr = PetscFree(svalues);CHKERRQ(ierr); 387 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNC__ 392 #define __FUNC__ "MatMult_MPIDense" 393 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 396 int ierr; 397 398 PetscFunctionBegin; 399 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNC__ 406 #define __FUNC__ "MatMultAdd_MPIDense" 407 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 410 int ierr; 411 412 PetscFunctionBegin; 413 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNC__ 420 #define __FUNC__ "MatMultTranspose_MPIDense" 421 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 422 { 423 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 424 int ierr; 425 Scalar zero = 0.0; 426 427 PetscFunctionBegin; 428 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 429 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 430 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 431 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNC__ 436 #define __FUNC__ "MatMultTransposeAdd_MPIDense" 437 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 438 { 439 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 440 int ierr; 441 442 PetscFunctionBegin; 443 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 444 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 445 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 446 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNC__ 451 #define __FUNC__ "MatGetDiagonal_MPIDense" 452 int MatGetDiagonal_MPIDense(Mat A,Vec v) 453 { 454 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 455 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 456 int ierr,len,i,n,m = A->m,radd; 457 Scalar *x,zero = 0.0; 458 459 PetscFunctionBegin; 460 ierr = VecSet(&zero,v);CHKERRQ(ierr); 461 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 462 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 463 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 464 len = PetscMin(a->A->m,a->A->n); 465 radd = a->rstart*m; 466 for (i=0; i<len; i++) { 467 x[i] = aloc->v[radd + i*m + i]; 468 } 469 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatDestroy_MPIDense" 475 int MatDestroy_MPIDense(Mat mat) 476 { 477 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 478 int ierr; 479 480 PetscFunctionBegin; 481 482 #if defined(PETSC_USE_LOG) 483 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 484 #endif 485 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 486 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 487 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 488 if (mdn->lvec) VecDestroy(mdn->lvec); 489 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 490 if (mdn->factor) { 491 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 492 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 493 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 494 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 495 } 496 ierr = PetscFree(mdn);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNC__ 501 #define __FUNC__ "MatView_MPIDense_Binary" 502 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 503 { 504 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 505 int ierr; 506 507 PetscFunctionBegin; 508 if (mdn->size == 1) { 509 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 510 } 511 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNC__ 516 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket" 517 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 518 { 519 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 520 int ierr,size = mdn->size,rank = mdn->rank; 521 PetscViewerType vtype; 522 PetscTruth isascii,isdraw; 523 PetscViewer sviewer; 524 PetscViewerFormat format; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 529 if (isascii) { 530 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 531 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 532 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 533 MatInfo info; 534 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 535 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 536 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 537 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 538 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } else if (format == PETSC_VIEWER_ASCII_INFO) { 541 PetscFunctionReturn(0); 542 } 543 } else if (isdraw) { 544 PetscDraw draw; 545 PetscTruth isnull; 546 547 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 548 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 549 if (isnull) PetscFunctionReturn(0); 550 } 551 552 if (size == 1) { 553 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 554 } else { 555 /* assemble the entire matrix onto first processor. */ 556 Mat A; 557 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 558 Scalar *vals; 559 560 if (!rank) { 561 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 562 } else { 563 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 564 } 565 PetscLogObjectParent(mat,A); 566 567 /* Copy the matrix ... This isn't the most efficient means, 568 but it's quick for now */ 569 row = mdn->rstart; m = mdn->A->m; 570 for (i=0; i<m; i++) { 571 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 572 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 573 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 574 row++; 575 } 576 577 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 578 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 579 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 580 if (!rank) { 581 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 582 } 583 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 584 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 585 ierr = MatDestroy(A);CHKERRQ(ierr); 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNC__ 591 #define __FUNC__ "MatView_MPIDense" 592 int MatView_MPIDense(Mat mat,PetscViewer viewer) 593 { 594 int ierr; 595 PetscTruth isascii,isbinary,isdraw,issocket; 596 597 PetscFunctionBegin; 598 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 601 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 602 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 603 604 if (isascii || issocket || isdraw) { 605 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 606 } else if (isbinary) { 607 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 608 } else { 609 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 610 } 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatGetInfo_MPIDense" 616 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 617 { 618 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 619 Mat mdn = mat->A; 620 int ierr; 621 PetscReal isend[5],irecv[5]; 622 623 PetscFunctionBegin; 624 info->rows_global = (double)A->M; 625 info->columns_global = (double)A->N; 626 info->rows_local = (double)A->m; 627 info->columns_local = (double)A->N; 628 info->block_size = 1.0; 629 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 630 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 631 isend[3] = info->memory; isend[4] = info->mallocs; 632 if (flag == MAT_LOCAL) { 633 info->nz_used = isend[0]; 634 info->nz_allocated = isend[1]; 635 info->nz_unneeded = isend[2]; 636 info->memory = isend[3]; 637 info->mallocs = isend[4]; 638 } else if (flag == MAT_GLOBAL_MAX) { 639 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 640 info->nz_used = irecv[0]; 641 info->nz_allocated = irecv[1]; 642 info->nz_unneeded = irecv[2]; 643 info->memory = irecv[3]; 644 info->mallocs = irecv[4]; 645 } else if (flag == MAT_GLOBAL_SUM) { 646 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 647 info->nz_used = irecv[0]; 648 info->nz_allocated = irecv[1]; 649 info->nz_unneeded = irecv[2]; 650 info->memory = irecv[3]; 651 info->mallocs = irecv[4]; 652 } 653 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 654 info->fill_ratio_needed = 0; 655 info->factor_mallocs = 0; 656 PetscFunctionReturn(0); 657 } 658 659 #undef __FUNC__ 660 #define __FUNC__ "MatSetOption_MPIDense" 661 int MatSetOption_MPIDense(Mat A,MatOption op) 662 { 663 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 664 int ierr; 665 666 PetscFunctionBegin; 667 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 668 op == MAT_YES_NEW_NONZERO_LOCATIONS || 669 op == MAT_NEW_NONZERO_LOCATION_ERR || 670 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 671 op == MAT_COLUMNS_SORTED || 672 op == MAT_COLUMNS_UNSORTED) { 673 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 674 } else if (op == MAT_ROW_ORIENTED) { 675 a->roworiented = PETSC_TRUE; 676 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 677 } else if (op == MAT_ROWS_SORTED || 678 op == MAT_ROWS_UNSORTED || 679 op == MAT_SYMMETRIC || 680 op == MAT_STRUCTURALLY_SYMMETRIC || 681 op == MAT_YES_NEW_DIAGONALS || 682 op == MAT_USE_HASH_TABLE) { 683 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 684 } else if (op == MAT_COLUMN_ORIENTED) { 685 a->roworiented = PETSC_FALSE; 686 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 687 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 688 a->donotstash = PETSC_TRUE; 689 } else if (op == MAT_NO_NEW_DIAGONALS) { 690 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 691 } else { 692 SETERRQ(PETSC_ERR_SUP,"unknown option"); 693 } 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNC__ 698 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 699 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 700 { 701 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 702 703 PetscFunctionBegin; 704 if (m) *m = mat->rstart; 705 if (n) *n = mat->rend; 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNC__ 710 #define __FUNC__ "MatGetRow_MPIDense" 711 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 712 { 713 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 714 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 715 716 PetscFunctionBegin; 717 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 718 lrow = row - rstart; 719 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNC__ 724 #define __FUNC__ "MatRestoreRow_MPIDense" 725 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 726 { 727 int ierr; 728 729 PetscFunctionBegin; 730 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 731 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNC__ 736 #define __FUNC__ "MatDiagonalScale_MPIDense" 737 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 738 { 739 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 740 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 741 Scalar *l,*r,x,*v; 742 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 743 744 PetscFunctionBegin; 745 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 746 if (ll) { 747 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 748 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 749 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 750 for (i=0; i<m; i++) { 751 x = l[i]; 752 v = mat->v + i; 753 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 754 } 755 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 756 PetscLogFlops(n*m); 757 } 758 if (rr) { 759 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 760 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 761 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 762 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 763 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 764 for (i=0; i<n; i++) { 765 x = r[i]; 766 v = mat->v + i*m; 767 for (j=0; j<m; j++) { (*v++) *= x;} 768 } 769 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 770 PetscLogFlops(n*m); 771 } 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNC__ 776 #define __FUNC__ "MatNorm_MPIDense" 777 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 778 { 779 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 780 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 781 int ierr,i,j; 782 PetscReal sum = 0.0; 783 Scalar *v = mat->v; 784 785 PetscFunctionBegin; 786 if (mdn->size == 1) { 787 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 788 } else { 789 if (type == NORM_FROBENIUS) { 790 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 791 #if defined(PETSC_USE_COMPLEX) 792 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 793 #else 794 sum += (*v)*(*v); v++; 795 #endif 796 } 797 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 798 *norm = sqrt(*norm); 799 PetscLogFlops(2*mdn->A->n*mdn->A->m); 800 } else if (type == NORM_1) { 801 PetscReal *tmp,*tmp2; 802 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 803 tmp2 = tmp + A->N; 804 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 805 *norm = 0.0; 806 v = mat->v; 807 for (j=0; j<mdn->A->n; j++) { 808 for (i=0; i<mdn->A->m; i++) { 809 tmp[j] += PetscAbsScalar(*v); v++; 810 } 811 } 812 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 813 for (j=0; j<A->N; j++) { 814 if (tmp2[j] > *norm) *norm = tmp2[j]; 815 } 816 ierr = PetscFree(tmp);CHKERRQ(ierr); 817 PetscLogFlops(A->n*A->m); 818 } else if (type == NORM_INFINITY) { /* max row norm */ 819 PetscReal ntemp; 820 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 821 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 822 } else { 823 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 824 } 825 } 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNC__ 830 #define __FUNC__ "MatTranspose_MPIDense" 831 int MatTranspose_MPIDense(Mat A,Mat *matout) 832 { 833 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 834 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 835 Mat B; 836 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 837 int j,i,ierr; 838 Scalar *v; 839 840 PetscFunctionBegin; 841 if (!matout && M != N) { 842 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 843 } 844 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 845 846 m = a->A->m; n = a->A->n; v = Aloc->v; 847 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 848 for (j=0; j<n; j++) { 849 for (i=0; i<m; i++) rwork[i] = rstart + i; 850 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 851 v += m; 852 } 853 ierr = PetscFree(rwork);CHKERRQ(ierr); 854 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 855 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 if (matout) { 857 *matout = B; 858 } else { 859 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #include "petscblaslapack.h" 865 #undef __FUNC__ 866 #define __FUNC__ "MatScale_MPIDense" 867 int MatScale_MPIDense(Scalar *alpha,Mat inA) 868 { 869 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 870 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 871 int one = 1,nz; 872 873 PetscFunctionBegin; 874 nz = inA->m*inA->N; 875 BLscal_(&nz,alpha,a->v,&one); 876 PetscLogFlops(nz); 877 PetscFunctionReturn(0); 878 } 879 880 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 881 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 882 883 #undef __FUNC__ 884 #define __FUNC__ "MatSetUpPreallocation_MPIDense" 885 int MatSetUpPreallocation_MPIDense(Mat A) 886 { 887 int ierr; 888 889 PetscFunctionBegin; 890 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 /* -------------------------------------------------------------------*/ 895 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 896 MatGetRow_MPIDense, 897 MatRestoreRow_MPIDense, 898 MatMult_MPIDense, 899 MatMultAdd_MPIDense, 900 MatMultTranspose_MPIDense, 901 MatMultTransposeAdd_MPIDense, 902 0, 903 0, 904 0, 905 0, 906 0, 907 0, 908 0, 909 MatTranspose_MPIDense, 910 MatGetInfo_MPIDense,0, 911 MatGetDiagonal_MPIDense, 912 MatDiagonalScale_MPIDense, 913 MatNorm_MPIDense, 914 MatAssemblyBegin_MPIDense, 915 MatAssemblyEnd_MPIDense, 916 0, 917 MatSetOption_MPIDense, 918 MatZeroEntries_MPIDense, 919 MatZeroRows_MPIDense, 920 0, 921 0, 922 0, 923 0, 924 MatSetUpPreallocation_MPIDense, 925 0, 926 MatGetOwnershipRange_MPIDense, 927 0, 928 0, 929 MatGetArray_MPIDense, 930 MatRestoreArray_MPIDense, 931 MatDuplicate_MPIDense, 932 0, 933 0, 934 0, 935 0, 936 0, 937 MatGetSubMatrices_MPIDense, 938 0, 939 MatGetValues_MPIDense, 940 0, 941 0, 942 MatScale_MPIDense, 943 0, 944 0, 945 0, 946 MatGetBlockSize_MPIDense, 947 0, 948 0, 949 0, 950 0, 951 0, 952 0, 953 0, 954 0, 955 0, 956 MatGetSubMatrix_MPIDense, 957 MatDestroy_MPIDense, 958 MatView_MPIDense, 959 MatGetMaps_Petsc}; 960 961 EXTERN_C_BEGIN 962 #undef __FUNC__ 963 #define __FUNC__ "MatCreate_MPIDense" 964 int MatCreate_MPIDense(Mat mat) 965 { 966 Mat_MPIDense *a; 967 int ierr,i; 968 969 PetscFunctionBegin; 970 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 971 mat->data = (void*)a; 972 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 973 mat->factor = 0; 974 mat->mapping = 0; 975 976 a->factor = 0; 977 mat->insertmode = NOT_SET_VALUES; 978 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 979 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 980 981 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 982 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 983 a->nvec = mat->n; 984 985 /* the information in the maps duplicates the information computed below, eventually 986 we should remove the duplicate information that is not contained in the maps */ 987 ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 988 ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 989 990 /* build local table of row and column ownerships */ 991 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 992 a->cowners = a->rowners + a->size + 1; 993 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 994 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 995 a->rowners[0] = 0; 996 for (i=2; i<=a->size; i++) { 997 a->rowners[i] += a->rowners[i-1]; 998 } 999 a->rstart = a->rowners[a->rank]; 1000 a->rend = a->rowners[a->rank+1]; 1001 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1002 a->cowners[0] = 0; 1003 for (i=2; i<=a->size; i++) { 1004 a->cowners[i] += a->cowners[i-1]; 1005 } 1006 1007 /* build cache for off array entries formed */ 1008 a->donotstash = PETSC_FALSE; 1009 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1010 1011 /* stuff used for matrix vector multiply */ 1012 a->lvec = 0; 1013 a->Mvctx = 0; 1014 a->roworiented = PETSC_TRUE; 1015 1016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1017 "MatGetDiagonalBlock_MPIDense", 1018 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 EXTERN_C_END 1022 1023 #undef __FUNC__ 1024 #define __FUNC__ "MatMPIDenseSetPreallocation" 1025 /*@C 1026 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1027 1028 Not collective 1029 1030 Input Parameters: 1031 . A - the matrix 1032 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1033 to control all matrix memory allocation. 1034 1035 Notes: 1036 The dense format is fully compatible with standard Fortran 77 1037 storage by columns. 1038 1039 The data input variable is intended primarily for Fortran programmers 1040 who wish to allocate their own matrix memory space. Most users should 1041 set data=PETSC_NULL. 1042 1043 Level: intermediate 1044 1045 .keywords: matrix,dense, parallel 1046 1047 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1048 @*/ 1049 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data) 1050 { 1051 Mat_MPIDense *a; 1052 int ierr; 1053 PetscTruth flg2; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1057 if (!flg2) PetscFunctionReturn(0); 1058 mat->preallocated = PETSC_TRUE; 1059 /* Note: For now, when data is specified above, this assumes the user correctly 1060 allocates the local dense storage space. We should add error checking. */ 1061 1062 a = (Mat_MPIDense*)mat->data; 1063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1064 PetscLogObjectParent(mat,a->A); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNC__ 1069 #define __FUNC__ "MatCreateMPIDense" 1070 /*@C 1071 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1072 1073 Collective on MPI_Comm 1074 1075 Input Parameters: 1076 + comm - MPI communicator 1077 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1078 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1079 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1080 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1081 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1082 to control all matrix memory allocation. 1083 1084 Output Parameter: 1085 . A - the matrix 1086 1087 Notes: 1088 The dense format is fully compatible with standard Fortran 77 1089 storage by columns. 1090 1091 The data input variable is intended primarily for Fortran programmers 1092 who wish to allocate their own matrix memory space. Most users should 1093 set data=PETSC_NULL. 1094 1095 The user MUST specify either the local or global matrix dimensions 1096 (possibly both). 1097 1098 Level: intermediate 1099 1100 .keywords: matrix,dense, parallel 1101 1102 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1103 @*/ 1104 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1105 { 1106 int ierr,size; 1107 1108 PetscFunctionBegin; 1109 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1111 if (size > 1) { 1112 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1113 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1114 } else { 1115 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1116 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNC__ 1122 #define __FUNC__ "MatDuplicate_MPIDense" 1123 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1124 { 1125 Mat mat; 1126 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1127 int ierr; 1128 1129 PetscFunctionBegin; 1130 *newmat = 0; 1131 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1132 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1133 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1134 mat->data = (void*)a; 1135 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1136 mat->factor = A->factor; 1137 mat->assembled = PETSC_TRUE; 1138 mat->preallocated = PETSC_TRUE; 1139 1140 a->rstart = oldmat->rstart; 1141 a->rend = oldmat->rend; 1142 a->size = oldmat->size; 1143 a->rank = oldmat->rank; 1144 mat->insertmode = NOT_SET_VALUES; 1145 a->nvec = oldmat->nvec; 1146 a->donotstash = oldmat->donotstash; 1147 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1148 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1149 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1150 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1151 1152 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1153 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1154 PetscLogObjectParent(mat,a->A); 1155 *newmat = mat; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #include "petscsys.h" 1160 1161 #undef __FUNC__ 1162 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1163 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1164 { 1165 int *rowners,i,size,rank,m,ierr,nz,j; 1166 Scalar *array,*vals,*vals_ptr; 1167 MPI_Status status; 1168 1169 PetscFunctionBegin; 1170 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1172 1173 /* determine ownership of all rows */ 1174 m = M/size + ((M % size) > rank); 1175 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1176 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1177 rowners[0] = 0; 1178 for (i=2; i<=size; i++) { 1179 rowners[i] += rowners[i-1]; 1180 } 1181 1182 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1183 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1184 1185 if (!rank) { 1186 ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr); 1187 1188 /* read in my part of the matrix numerical values */ 1189 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1190 1191 /* insert into matrix-by row (this is why cannot directly read into array */ 1192 vals_ptr = vals; 1193 for (i=0; i<m; i++) { 1194 for (j=0; j<N; j++) { 1195 array[i + j*m] = *vals_ptr++; 1196 } 1197 } 1198 1199 /* read in other processors and ship out */ 1200 for (i=1; i<size; i++) { 1201 nz = (rowners[i+1] - rowners[i])*N; 1202 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1203 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1204 } 1205 } else { 1206 /* receive numeric values */ 1207 ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr); 1208 1209 /* receive message of values*/ 1210 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1211 1212 /* insert into matrix-by row (this is why cannot directly read into array */ 1213 vals_ptr = vals; 1214 for (i=0; i<m; i++) { 1215 for (j=0; j<N; j++) { 1216 array[i + j*m] = *vals_ptr++; 1217 } 1218 } 1219 } 1220 ierr = PetscFree(rowners);CHKERRQ(ierr); 1221 ierr = PetscFree(vals);CHKERRQ(ierr); 1222 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1223 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 EXTERN_C_BEGIN 1228 #undef __FUNC__ 1229 #define __FUNC__ "MatLoad_MPIDense" 1230 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1231 { 1232 Mat A; 1233 Scalar *vals,*svals; 1234 MPI_Comm comm = ((PetscObject)viewer)->comm; 1235 MPI_Status status; 1236 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1237 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1238 int tag = ((PetscObject)viewer)->tag; 1239 int i,nz,ierr,j,rstart,rend,fd; 1240 1241 PetscFunctionBegin; 1242 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1243 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1244 if (!rank) { 1245 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1246 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1247 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1248 } 1249 1250 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1251 M = header[1]; N = header[2]; nz = header[3]; 1252 1253 /* 1254 Handle case where matrix is stored on disk as a dense matrix 1255 */ 1256 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1257 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 /* determine ownership of all rows */ 1262 m = M/size + ((M % size) > rank); 1263 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1264 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1265 rowners[0] = 0; 1266 for (i=2; i<=size; i++) { 1267 rowners[i] += rowners[i-1]; 1268 } 1269 rstart = rowners[rank]; 1270 rend = rowners[rank+1]; 1271 1272 /* distribute row lengths to all processors */ 1273 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1274 offlens = ourlens + (rend-rstart); 1275 if (!rank) { 1276 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1277 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1278 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1279 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1280 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1281 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1282 } else { 1283 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1284 } 1285 1286 if (!rank) { 1287 /* calculate the number of nonzeros on each processor */ 1288 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1289 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1290 for (i=0; i<size; i++) { 1291 for (j=rowners[i]; j< rowners[i+1]; j++) { 1292 procsnz[i] += rowlengths[j]; 1293 } 1294 } 1295 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1296 1297 /* determine max buffer needed and allocate it */ 1298 maxnz = 0; 1299 for (i=0; i<size; i++) { 1300 maxnz = PetscMax(maxnz,procsnz[i]); 1301 } 1302 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1303 1304 /* read in my part of the matrix column indices */ 1305 nz = procsnz[0]; 1306 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1307 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1308 1309 /* read in every one elses and ship off */ 1310 for (i=1; i<size; i++) { 1311 nz = procsnz[i]; 1312 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1313 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1314 } 1315 ierr = PetscFree(cols);CHKERRQ(ierr); 1316 } else { 1317 /* determine buffer space needed for message */ 1318 nz = 0; 1319 for (i=0; i<m; i++) { 1320 nz += ourlens[i]; 1321 } 1322 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1323 1324 /* receive message of column indices*/ 1325 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1326 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1327 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1328 } 1329 1330 /* loop over local rows, determining number of off diagonal entries */ 1331 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1332 jj = 0; 1333 for (i=0; i<m; i++) { 1334 for (j=0; j<ourlens[i]; j++) { 1335 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1336 jj++; 1337 } 1338 } 1339 1340 /* create our matrix */ 1341 for (i=0; i<m; i++) { 1342 ourlens[i] -= offlens[i]; 1343 } 1344 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1345 A = *newmat; 1346 for (i=0; i<m; i++) { 1347 ourlens[i] += offlens[i]; 1348 } 1349 1350 if (!rank) { 1351 ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1352 1353 /* read in my part of the matrix numerical values */ 1354 nz = procsnz[0]; 1355 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1356 1357 /* insert into matrix */ 1358 jj = rstart; 1359 smycols = mycols; 1360 svals = vals; 1361 for (i=0; i<m; i++) { 1362 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1363 smycols += ourlens[i]; 1364 svals += ourlens[i]; 1365 jj++; 1366 } 1367 1368 /* read in other processors and ship out */ 1369 for (i=1; i<size; i++) { 1370 nz = procsnz[i]; 1371 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1372 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1373 } 1374 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1375 } else { 1376 /* receive numeric values */ 1377 ierr = PetscMalloc(nz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1378 1379 /* receive message of values*/ 1380 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1381 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1382 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1383 1384 /* insert into matrix */ 1385 jj = rstart; 1386 smycols = mycols; 1387 svals = vals; 1388 for (i=0; i<m; i++) { 1389 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1390 smycols += ourlens[i]; 1391 svals += ourlens[i]; 1392 jj++; 1393 } 1394 } 1395 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1396 ierr = PetscFree(vals);CHKERRQ(ierr); 1397 ierr = PetscFree(mycols);CHKERRQ(ierr); 1398 ierr = PetscFree(rowners);CHKERRQ(ierr); 1399 1400 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 EXTERN_C_END 1405 1406 1407 1408 1409 1410