1 /*$Id: mpidense.c,v 1.128 1999/10/13 20:37:17 bsmith Exp bsmith $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNC__ 12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 16 int m = mdn->m,rstart = mdn->rstart,rank,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 27 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 28 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 29 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 30 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33 34 *iscopy = PETSC_TRUE; 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 extern int MatSetUpMultiply_MPIDense(Mat); 40 41 #undef __FUNC__ 42 #define __FUNC__ "MatSetValues_MPIDense" 43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 44 { 45 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 46 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 47 int roworiented = A->roworiented; 48 49 PetscFunctionBegin; 50 for ( i=0; i<m; i++ ) { 51 if (idxm[i] < 0) continue; 52 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 if (roworiented) { 56 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 57 } else { 58 for ( j=0; j<n; j++ ) { 59 if (idxn[j] < 0) continue; 60 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 61 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 62 } 63 } 64 } else { 65 if ( !A->donotstash) { 66 if (roworiented) { 67 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 68 } else { 69 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 70 } 71 } 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetValues_MPIDense" 79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 80 { 81 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 82 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 83 84 PetscFunctionBegin; 85 for ( i=0; i<m; i++ ) { 86 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 87 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 88 if (idxm[i] >= rstart && idxm[i] < rend) { 89 row = idxm[i] - rstart; 90 for ( j=0; j<n; j++ ) { 91 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 92 if (idxn[j] >= mdn->N) { 93 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 94 } 95 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 96 } 97 } else { 98 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 99 } 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNC__ 105 #define __FUNC__ "MatGetArray_MPIDense" 106 int MatGetArray_MPIDense(Mat A,Scalar **array) 107 { 108 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 109 int ierr; 110 111 PetscFunctionBegin; 112 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNC__ 117 #define __FUNC__ "MatGetSubMatrix_MPIDense" 118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 119 { 120 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 121 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 122 int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 123 Scalar *av, *bv, *v = lmat->v; 124 Mat newmat; 125 126 PetscFunctionBegin; 127 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 128 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 129 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 130 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 131 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 132 133 /* No parallel redistribution currently supported! Should really check each index set 134 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 135 original matrix! */ 136 137 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 138 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 139 140 /* Check submatrix call */ 141 if (scall == MAT_REUSE_MATRIX) { 142 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 143 /* Really need to test rows and column sizes! */ 144 newmat = *B; 145 } else { 146 /* Create and fill new matrix */ 147 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 148 } 149 150 /* Now extract the data pointers and do the copy, column at a time */ 151 newmatd = (Mat_MPIDense *) newmat->data; 152 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 153 154 for ( i=0; i<ncols; i++ ) { 155 av = v + nlrows*icol[i]; 156 for (j=0; j<nrows; j++ ) { 157 *bv++ = av[irow[j] - rstart]; 158 } 159 } 160 161 /* Assemble the matrices so that the correct flags are set */ 162 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 165 /* Free work space */ 166 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 167 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 168 *B = newmat; 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNC__ 173 #define __FUNC__ "MatRestoreArray_MPIDense" 174 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 175 { 176 PetscFunctionBegin; 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ "MatAssemblyBegin_MPIDense" 182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 183 { 184 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 185 MPI_Comm comm = mat->comm; 186 int ierr,nstash,reallocs; 187 InsertMode addv; 188 189 PetscFunctionBegin; 190 /* make sure all processors are either in INSERTMODE or ADDMODE */ 191 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 192 if (addv == (ADD_VALUES|INSERT_VALUES)) { 193 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 194 } 195 mat->insertmode = addv; /* in case this processor had no cache */ 196 197 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 198 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 199 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 200 nstash,reallocs); 201 PetscFunctionReturn(0); 202 } 203 204 #undef __FUNC__ 205 #define __FUNC__ "MatAssemblyEnd_MPIDense" 206 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 207 { 208 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 209 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 210 Scalar *val; 211 InsertMode addv=mat->insertmode; 212 213 PetscFunctionBegin; 214 /* wait on receives */ 215 while (1) { 216 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 217 if (!flg) break; 218 219 for ( i=0; i<n; ) { 220 /* Now identify the consecutive vals belonging to the same row */ 221 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 222 if (j < n) ncols = j-i; 223 else ncols = n-i; 224 /* Now assemble all these values with a single function call */ 225 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 226 i = j; 227 } 228 } 229 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 230 231 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 233 234 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 235 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 236 } 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNC__ 241 #define __FUNC__ "MatZeroEntries_MPIDense" 242 int MatZeroEntries_MPIDense(Mat A) 243 { 244 int ierr; 245 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 246 247 PetscFunctionBegin; 248 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 #undef __FUNC__ 253 #define __FUNC__ "MatGetBlockSize_MPIDense" 254 int MatGetBlockSize_MPIDense(Mat A,int *bs) 255 { 256 PetscFunctionBegin; 257 *bs = 1; 258 PetscFunctionReturn(0); 259 } 260 261 /* the code does not do the diagonal entries correctly unless the 262 matrix is square and the column and row owerships are identical. 263 This is a BUG. The only way to fix it seems to be to access 264 mdn->A and mdn->B directly and not through the MatZeroRows() 265 routine. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ "MatZeroRows_MPIDense" 269 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 270 { 271 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 272 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 273 int *procs,*nprocs,j,found,idx,nsends,*work; 274 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 275 int *rvalues,tag = A->tag,count,base,slen,n,*source; 276 int *lens,imdex,*lrows,*values; 277 MPI_Comm comm = A->comm; 278 MPI_Request *send_waits,*recv_waits; 279 MPI_Status recv_status,*send_status; 280 IS istmp; 281 282 PetscFunctionBegin; 283 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 284 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 285 286 /* first count number of contributors to each processor */ 287 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 288 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 289 procs = nprocs + size; 290 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 291 for ( i=0; i<N; i++ ) { 292 idx = rows[i]; 293 found = 0; 294 for ( j=0; j<size; j++ ) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 297 } 298 } 299 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 300 } 301 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 305 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 306 nmax = work[rank]; 307 nrecvs = work[size+rank]; 308 ierr = PetscFree(work);CHKERRQ(ierr); 309 310 /* post receives: */ 311 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 312 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 313 for ( i=0; i<nrecvs; i++ ) { 314 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 315 } 316 317 /* do sends: 318 1) starts[i] gives the starting index in svalues for stuff going to 319 the ith processor 320 */ 321 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 322 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 323 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 324 starts[0] = 0; 325 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 326 for ( i=0; i<N; i++ ) { 327 svalues[starts[owner[i]]++] = rows[i]; 328 } 329 ISRestoreIndices(is,&rows); 330 331 starts[0] = 0; 332 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 333 count = 0; 334 for ( i=0; i<size; i++ ) { 335 if (procs[i]) { 336 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 337 } 338 } 339 ierr = PetscFree(starts);CHKERRQ(ierr); 340 341 base = owners[rank]; 342 343 /* wait on receives */ 344 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 345 source = lens + nrecvs; 346 count = nrecvs; slen = 0; 347 while (count) { 348 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 349 /* unpack receives into our local space */ 350 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 351 source[imdex] = recv_status.MPI_SOURCE; 352 lens[imdex] = n; 353 slen += n; 354 count--; 355 } 356 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 357 358 /* move the data into the send scatter */ 359 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 360 count = 0; 361 for ( i=0; i<nrecvs; i++ ) { 362 values = rvalues + i*nmax; 363 for ( j=0; j<lens[i]; j++ ) { 364 lrows[count++] = values[j] - base; 365 } 366 } 367 ierr = PetscFree(rvalues);CHKERRQ(ierr); 368 ierr = PetscFree(lens);CHKERRQ(ierr); 369 ierr = PetscFree(owner);CHKERRQ(ierr); 370 ierr = PetscFree(nprocs);CHKERRQ(ierr); 371 372 /* actually zap the local rows */ 373 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 374 PLogObjectParent(A,istmp); 375 ierr = PetscFree(lrows);CHKERRQ(ierr); 376 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 377 ierr = ISDestroy(istmp);CHKERRQ(ierr); 378 379 /* wait on sends */ 380 if (nsends) { 381 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 382 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 383 ierr = PetscFree(send_status);CHKERRQ(ierr); 384 } 385 ierr = PetscFree(send_waits);CHKERRQ(ierr); 386 ierr = PetscFree(svalues);CHKERRQ(ierr); 387 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNC__ 392 #define __FUNC__ "MatMult_MPIDense" 393 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 396 int ierr; 397 398 PetscFunctionBegin; 399 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNC__ 406 #define __FUNC__ "MatMultAdd_MPIDense" 407 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 410 int ierr; 411 412 PetscFunctionBegin; 413 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNC__ 420 #define __FUNC__ "MatMultTrans_MPIDense" 421 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 422 { 423 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 424 int ierr; 425 Scalar zero = 0.0; 426 427 PetscFunctionBegin; 428 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 429 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 430 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 431 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNC__ 436 #define __FUNC__ "MatMultTransAdd_MPIDense" 437 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 438 { 439 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 440 int ierr; 441 442 PetscFunctionBegin; 443 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 444 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 445 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 446 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNC__ 451 #define __FUNC__ "MatGetDiagonal_MPIDense" 452 int MatGetDiagonal_MPIDense(Mat A,Vec v) 453 { 454 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 455 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 456 int ierr, len, i, n, m = a->m, radd; 457 Scalar *x, zero = 0.0; 458 459 PetscFunctionBegin; 460 VecSet(&zero,v); 461 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 462 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 463 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 464 len = PetscMin(aloc->m,aloc->n); 465 radd = a->rstart*m; 466 for ( i=0; i<len; i++ ) { 467 x[i] = aloc->v[radd + i*m + i]; 468 } 469 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatDestroy_MPIDense" 475 int MatDestroy_MPIDense(Mat mat) 476 { 477 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 478 int ierr; 479 480 PetscFunctionBegin; 481 482 if (mat->mapping) { 483 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 484 } 485 if (mat->bmapping) { 486 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_LOG) 489 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 490 #endif 491 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 492 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 493 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 494 if (mdn->lvec) VecDestroy(mdn->lvec); 495 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 496 if (mdn->factor) { 497 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 498 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 499 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 500 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 501 } 502 ierr = PetscFree(mdn);CHKERRQ(ierr); 503 if (mat->rmap) { 504 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 505 } 506 if (mat->cmap) { 507 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 508 } 509 PLogObjectDestroy(mat); 510 PetscHeaderDestroy(mat); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNC__ 515 #define __FUNC__ "MatView_MPIDense_Binary" 516 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 517 { 518 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 519 int ierr; 520 521 PetscFunctionBegin; 522 if (mdn->size == 1) { 523 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 524 } 525 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNC__ 530 #define __FUNC__ "MatView_MPIDense_ASCII" 531 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 532 { 533 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 534 int ierr, format, size = mdn->size, rank = mdn->rank; 535 FILE *fd; 536 ViewerType vtype; 537 538 PetscFunctionBegin; 539 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 540 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 541 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 542 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 543 MatInfo info; 544 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 545 ierr = ViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 546 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 547 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 548 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 551 PetscFunctionReturn(0); 552 } 553 554 if (size == 1) { 555 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 556 } else { 557 /* assemble the entire matrix onto first processor. */ 558 Mat A; 559 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 560 Scalar *vals; 561 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 562 563 if (!rank) { 564 ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 565 } else { 566 ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 567 } 568 PLogObjectParent(mat,A); 569 570 /* Copy the matrix ... This isn't the most efficient means, 571 but it's quick for now */ 572 row = mdn->rstart; m = Amdn->m; 573 for ( i=0; i<m; i++ ) { 574 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 575 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 577 row++; 578 } 579 580 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 581 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 582 if (!rank) { 583 Viewer sviewer; 584 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 585 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 586 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 587 } 588 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 589 ierr = MatDestroy(A);CHKERRQ(ierr); 590 } 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNC__ 595 #define __FUNC__ "MatView_MPIDense" 596 int MatView_MPIDense(Mat mat,Viewer viewer) 597 { 598 int ierr; 599 PetscTruth isascii,isbinary; 600 601 PetscFunctionBegin; 602 603 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 604 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 605 606 if (isascii) { 607 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 608 } else if (isbinary) { 609 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 610 } else { 611 SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 612 } 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNC__ 617 #define __FUNC__ "MatGetInfo_MPIDense" 618 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 619 { 620 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 621 Mat mdn = mat->A; 622 int ierr; 623 double isend[5], irecv[5]; 624 625 PetscFunctionBegin; 626 info->rows_global = (double)mat->M; 627 info->columns_global = (double)mat->N; 628 info->rows_local = (double)mat->m; 629 info->columns_local = (double)mat->N; 630 info->block_size = 1.0; 631 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 632 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 633 isend[3] = info->memory; isend[4] = info->mallocs; 634 if (flag == MAT_LOCAL) { 635 info->nz_used = isend[0]; 636 info->nz_allocated = isend[1]; 637 info->nz_unneeded = isend[2]; 638 info->memory = isend[3]; 639 info->mallocs = isend[4]; 640 } else if (flag == MAT_GLOBAL_MAX) { 641 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 642 info->nz_used = irecv[0]; 643 info->nz_allocated = irecv[1]; 644 info->nz_unneeded = irecv[2]; 645 info->memory = irecv[3]; 646 info->mallocs = irecv[4]; 647 } else if (flag == MAT_GLOBAL_SUM) { 648 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 649 info->nz_used = irecv[0]; 650 info->nz_allocated = irecv[1]; 651 info->nz_unneeded = irecv[2]; 652 info->memory = irecv[3]; 653 info->mallocs = irecv[4]; 654 } 655 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 656 info->fill_ratio_needed = 0; 657 info->factor_mallocs = 0; 658 PetscFunctionReturn(0); 659 } 660 661 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 662 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 663 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 664 extern int MatSolve_MPIDense(Mat,Vec,Vec); 665 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 666 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 667 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 668 669 #undef __FUNC__ 670 #define __FUNC__ "MatSetOption_MPIDense" 671 int MatSetOption_MPIDense(Mat A,MatOption op) 672 { 673 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 674 675 PetscFunctionBegin; 676 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 677 op == MAT_YES_NEW_NONZERO_LOCATIONS || 678 op == MAT_NEW_NONZERO_LOCATION_ERR || 679 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 680 op == MAT_COLUMNS_SORTED || 681 op == MAT_COLUMNS_UNSORTED) { 682 MatSetOption(a->A,op); 683 } else if (op == MAT_ROW_ORIENTED) { 684 a->roworiented = 1; 685 MatSetOption(a->A,op); 686 } else if (op == MAT_ROWS_SORTED || 687 op == MAT_ROWS_UNSORTED || 688 op == MAT_SYMMETRIC || 689 op == MAT_STRUCTURALLY_SYMMETRIC || 690 op == MAT_YES_NEW_DIAGONALS || 691 op == MAT_USE_HASH_TABLE) { 692 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 693 } else if (op == MAT_COLUMN_ORIENTED) { 694 a->roworiented = 0; MatSetOption(a->A,op); 695 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 696 a->donotstash = 1; 697 } else if (op == MAT_NO_NEW_DIAGONALS) { 698 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 699 } else { 700 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 701 } 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNC__ 706 #define __FUNC__ "MatGetSize_MPIDense" 707 int MatGetSize_MPIDense(Mat A,int *m,int *n) 708 { 709 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 710 711 PetscFunctionBegin; 712 *m = mat->M; *n = mat->N; 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNC__ 717 #define __FUNC__ "MatGetLocalSize_MPIDense" 718 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 719 { 720 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 721 722 PetscFunctionBegin; 723 *m = mat->m; *n = mat->N; 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNC__ 728 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 729 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 730 { 731 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 732 733 PetscFunctionBegin; 734 *m = mat->rstart; *n = mat->rend; 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNC__ 739 #define __FUNC__ "MatGetRow_MPIDense" 740 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 741 { 742 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 743 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 744 745 PetscFunctionBegin; 746 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 747 lrow = row - rstart; 748 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNC__ 753 #define __FUNC__ "MatRestoreRow_MPIDense" 754 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 755 { 756 int ierr; 757 758 PetscFunctionBegin; 759 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 760 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNC__ 765 #define __FUNC__ "MatDiagonalScale_MPIDense" 766 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 767 { 768 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 769 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 770 Scalar *l,*r,x,*v; 771 int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 772 773 PetscFunctionBegin; 774 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 775 if (ll) { 776 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 777 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 778 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 779 for ( i=0; i<m; i++ ) { 780 x = l[i]; 781 v = mat->v + i; 782 for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 783 } 784 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 785 PLogFlops(n*m); 786 } 787 if (rr) { 788 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 789 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 790 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 791 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 792 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 793 for ( i=0; i<n; i++ ) { 794 x = r[i]; 795 v = mat->v + i*m; 796 for ( j=0; j<m; j++ ) { (*v++) *= x;} 797 } 798 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 799 PLogFlops(n*m); 800 } 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNC__ 805 #define __FUNC__ "MatNorm_MPIDense" 806 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 807 { 808 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 809 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 810 int ierr, i, j; 811 double sum = 0.0; 812 Scalar *v = mat->v; 813 814 PetscFunctionBegin; 815 if (mdn->size == 1) { 816 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 817 } else { 818 if (type == NORM_FROBENIUS) { 819 for (i=0; i<mat->n*mat->m; i++ ) { 820 #if defined(PETSC_USE_COMPLEX) 821 sum += PetscReal(PetscConj(*v)*(*v)); v++; 822 #else 823 sum += (*v)*(*v); v++; 824 #endif 825 } 826 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 827 *norm = sqrt(*norm); 828 PLogFlops(2*mat->n*mat->m); 829 } else if (type == NORM_1) { 830 double *tmp, *tmp2; 831 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 832 tmp2 = tmp + mdn->N; 833 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 834 *norm = 0.0; 835 v = mat->v; 836 for ( j=0; j<mat->n; j++ ) { 837 for ( i=0; i<mat->m; i++ ) { 838 tmp[j] += PetscAbsScalar(*v); v++; 839 } 840 } 841 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 842 for ( j=0; j<mdn->N; j++ ) { 843 if (tmp2[j] > *norm) *norm = tmp2[j]; 844 } 845 ierr = PetscFree(tmp);CHKERRQ(ierr); 846 PLogFlops(mat->n*mat->m); 847 } else if (type == NORM_INFINITY) { /* max row norm */ 848 double ntemp; 849 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 850 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 851 } else { 852 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 853 } 854 } 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNC__ 859 #define __FUNC__ "MatTranspose_MPIDense" 860 int MatTranspose_MPIDense(Mat A,Mat *matout) 861 { 862 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 863 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 864 Mat B; 865 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 866 int j, i, ierr; 867 Scalar *v; 868 869 PetscFunctionBegin; 870 if (matout == PETSC_NULL && M != N) { 871 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 872 } 873 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 874 875 m = Aloc->m; n = Aloc->n; v = Aloc->v; 876 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 877 for ( j=0; j<n; j++ ) { 878 for (i=0; i<m; i++) rwork[i] = rstart + i; 879 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 880 v += m; 881 } 882 ierr = PetscFree(rwork);CHKERRQ(ierr); 883 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 if (matout != PETSC_NULL) { 886 *matout = B; 887 } else { 888 PetscOps *Abops; 889 MatOps Aops; 890 891 /* This isn't really an in-place transpose, but free data struct from a */ 892 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 893 ierr = MatDestroy(a->A);CHKERRQ(ierr); 894 if (a->lvec) VecDestroy(a->lvec); 895 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 896 ierr = PetscFree(a);CHKERRQ(ierr); 897 898 /* 899 This is horrible, horrible code. We need to keep the 900 A pointers for the bops and ops but copy everything 901 else from C. 902 */ 903 Abops = A->bops; 904 Aops = A->ops; 905 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 906 A->bops = Abops; 907 A->ops = Aops; 908 909 PetscHeaderDestroy(B); 910 } 911 PetscFunctionReturn(0); 912 } 913 914 #include "pinclude/blaslapack.h" 915 #undef __FUNC__ 916 #define __FUNC__ "MatScale_MPIDense" 917 int MatScale_MPIDense(Scalar *alpha,Mat inA) 918 { 919 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 920 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 921 int one = 1, nz; 922 923 PetscFunctionBegin; 924 nz = a->m*a->n; 925 BLscal_( &nz, alpha, a->v, &one ); 926 PLogFlops(nz); 927 PetscFunctionReturn(0); 928 } 929 930 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 931 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 932 933 /* -------------------------------------------------------------------*/ 934 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 935 MatGetRow_MPIDense, 936 MatRestoreRow_MPIDense, 937 MatMult_MPIDense, 938 MatMultAdd_MPIDense, 939 MatMultTrans_MPIDense, 940 MatMultTransAdd_MPIDense, 941 0, 942 0, 943 0, 944 0, 945 0, 946 0, 947 0, 948 MatTranspose_MPIDense, 949 MatGetInfo_MPIDense,0, 950 MatGetDiagonal_MPIDense, 951 MatDiagonalScale_MPIDense, 952 MatNorm_MPIDense, 953 MatAssemblyBegin_MPIDense, 954 MatAssemblyEnd_MPIDense, 955 0, 956 MatSetOption_MPIDense, 957 MatZeroEntries_MPIDense, 958 MatZeroRows_MPIDense, 959 0, 960 0, 961 0, 962 0, 963 MatGetSize_MPIDense, 964 MatGetLocalSize_MPIDense, 965 MatGetOwnershipRange_MPIDense, 966 0, 967 0, 968 MatGetArray_MPIDense, 969 MatRestoreArray_MPIDense, 970 MatDuplicate_MPIDense, 971 0, 972 0, 973 0, 974 0, 975 0, 976 MatGetSubMatrices_MPIDense, 977 0, 978 MatGetValues_MPIDense, 979 0, 980 0, 981 MatScale_MPIDense, 982 0, 983 0, 984 0, 985 MatGetBlockSize_MPIDense, 986 0, 987 0, 988 0, 989 0, 990 0, 991 0, 992 0, 993 0, 994 0, 995 MatGetSubMatrix_MPIDense, 996 0, 997 0, 998 MatGetMaps_Petsc}; 999 1000 #undef __FUNC__ 1001 #define __FUNC__ "MatCreateMPIDense" 1002 /*@C 1003 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1004 1005 Collective on MPI_Comm 1006 1007 Input Parameters: 1008 + comm - MPI communicator 1009 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1010 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1011 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1012 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1013 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1014 to control all matrix memory allocation. 1015 1016 Output Parameter: 1017 . A - the matrix 1018 1019 Notes: 1020 The dense format is fully compatible with standard Fortran 77 1021 storage by columns. 1022 1023 The data input variable is intended primarily for Fortran programmers 1024 who wish to allocate their own matrix memory space. Most users should 1025 set data=PETSC_NULL. 1026 1027 The user MUST specify either the local or global matrix dimensions 1028 (possibly both). 1029 1030 Level: intermediate 1031 1032 .keywords: matrix, dense, parallel 1033 1034 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1035 @*/ 1036 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1037 { 1038 Mat mat; 1039 Mat_MPIDense *a; 1040 int ierr, i,flg; 1041 1042 PetscFunctionBegin; 1043 /* Note: For now, when data is specified above, this assumes the user correctly 1044 allocates the local dense storage space. We should add error checking. */ 1045 1046 *A = 0; 1047 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1048 PLogObjectCreate(mat); 1049 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1050 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1051 mat->ops->destroy = MatDestroy_MPIDense; 1052 mat->ops->view = MatView_MPIDense; 1053 mat->factor = 0; 1054 mat->mapping = 0; 1055 1056 a->factor = 0; 1057 mat->insertmode = NOT_SET_VALUES; 1058 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1059 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1060 1061 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1062 1063 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1064 a->nvec = n; 1065 1066 /* each row stores all columns */ 1067 a->N = mat->N = N; 1068 a->M = mat->M = M; 1069 a->m = mat->m = m; 1070 a->n = mat->n = N; /* NOTE: n == N */ 1071 1072 /* the information in the maps duplicates the information computed below, eventually 1073 we should remove the duplicate information that is not contained in the maps */ 1074 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1075 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1076 1077 /* build local table of row and column ownerships */ 1078 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1079 a->cowners = a->rowners + a->size + 1; 1080 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1081 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1082 a->rowners[0] = 0; 1083 for ( i=2; i<=a->size; i++ ) { 1084 a->rowners[i] += a->rowners[i-1]; 1085 } 1086 a->rstart = a->rowners[a->rank]; 1087 a->rend = a->rowners[a->rank+1]; 1088 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1089 a->cowners[0] = 0; 1090 for ( i=2; i<=a->size; i++ ) { 1091 a->cowners[i] += a->cowners[i-1]; 1092 } 1093 1094 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1095 PLogObjectParent(mat,a->A); 1096 1097 /* build cache for off array entries formed */ 1098 a->donotstash = 0; 1099 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1100 1101 /* stuff used for matrix vector multiply */ 1102 a->lvec = 0; 1103 a->Mvctx = 0; 1104 a->roworiented = 1; 1105 1106 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C", 1107 "MatGetDiagonalBlock_MPIDense", 1108 (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1109 1110 *A = mat; 1111 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1112 if (flg) { 1113 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNC__ 1119 #define __FUNC__ "MatDuplicate_MPIDense" 1120 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1121 { 1122 Mat mat; 1123 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1124 int ierr; 1125 FactorCtx *factor; 1126 1127 PetscFunctionBegin; 1128 *newmat = 0; 1129 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1130 PLogObjectCreate(mat); 1131 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1132 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1133 mat->ops->destroy = MatDestroy_MPIDense; 1134 mat->ops->view = MatView_MPIDense; 1135 mat->factor = A->factor; 1136 mat->assembled = PETSC_TRUE; 1137 1138 a->m = mat->m = oldmat->m; 1139 a->n = mat->n = oldmat->n; 1140 a->M = mat->M = oldmat->M; 1141 a->N = mat->N = oldmat->N; 1142 if (oldmat->factor) { 1143 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1144 /* copy factor contents ... add this code! */ 1145 } else a->factor = 0; 1146 1147 a->rstart = oldmat->rstart; 1148 a->rend = oldmat->rend; 1149 a->size = oldmat->size; 1150 a->rank = oldmat->rank; 1151 mat->insertmode = NOT_SET_VALUES; 1152 a->donotstash = oldmat->donotstash; 1153 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1154 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1155 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1156 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1157 1158 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1159 PLogObjectParent(mat,a->lvec); 1160 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1161 PLogObjectParent(mat,a->Mvctx); 1162 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1163 PLogObjectParent(mat,a->A); 1164 *newmat = mat; 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #include "sys.h" 1169 1170 #undef __FUNC__ 1171 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1172 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1173 { 1174 int *rowners, i,size,rank,m,ierr,nz,j; 1175 Scalar *array,*vals,*vals_ptr; 1176 MPI_Status status; 1177 1178 PetscFunctionBegin; 1179 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1181 1182 /* determine ownership of all rows */ 1183 m = M/size + ((M % size) > rank); 1184 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1185 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1186 rowners[0] = 0; 1187 for ( i=2; i<=size; i++ ) { 1188 rowners[i] += rowners[i-1]; 1189 } 1190 1191 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1192 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1193 1194 if (!rank) { 1195 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1196 1197 /* read in my part of the matrix numerical values */ 1198 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1199 1200 /* insert into matrix-by row (this is why cannot directly read into array */ 1201 vals_ptr = vals; 1202 for ( i=0; i<m; i++ ) { 1203 for ( j=0; j<N; j++ ) { 1204 array[i + j*m] = *vals_ptr++; 1205 } 1206 } 1207 1208 /* read in other processors and ship out */ 1209 for ( i=1; i<size; i++ ) { 1210 nz = (rowners[i+1] - rowners[i])*N; 1211 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1212 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1213 } 1214 } else { 1215 /* receive numeric values */ 1216 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1217 1218 /* receive message of values*/ 1219 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1220 1221 /* insert into matrix-by row (this is why cannot directly read into array */ 1222 vals_ptr = vals; 1223 for ( i=0; i<m; i++ ) { 1224 for ( j=0; j<N; j++ ) { 1225 array[i + j*m] = *vals_ptr++; 1226 } 1227 } 1228 } 1229 ierr = PetscFree(rowners);CHKERRQ(ierr); 1230 ierr = PetscFree(vals);CHKERRQ(ierr); 1231 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1232 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 1237 #undef __FUNC__ 1238 #define __FUNC__ "MatLoad_MPIDense" 1239 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1240 { 1241 Mat A; 1242 Scalar *vals,*svals; 1243 MPI_Comm comm = ((PetscObject)viewer)->comm; 1244 MPI_Status status; 1245 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1246 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1247 int tag = ((PetscObject)viewer)->tag; 1248 int i, nz, ierr, j,rstart, rend, fd; 1249 1250 PetscFunctionBegin; 1251 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1253 if (!rank) { 1254 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1255 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1256 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1257 } 1258 1259 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1260 M = header[1]; N = header[2]; nz = header[3]; 1261 1262 /* 1263 Handle case where matrix is stored on disk as a dense matrix 1264 */ 1265 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1266 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 /* determine ownership of all rows */ 1271 m = M/size + ((M % size) > rank); 1272 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1273 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1274 rowners[0] = 0; 1275 for ( i=2; i<=size; i++ ) { 1276 rowners[i] += rowners[i-1]; 1277 } 1278 rstart = rowners[rank]; 1279 rend = rowners[rank+1]; 1280 1281 /* distribute row lengths to all processors */ 1282 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1283 offlens = ourlens + (rend-rstart); 1284 if (!rank) { 1285 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1286 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1287 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1288 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1289 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1290 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1291 } else { 1292 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1293 } 1294 1295 if (!rank) { 1296 /* calculate the number of nonzeros on each processor */ 1297 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1298 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1299 for ( i=0; i<size; i++ ) { 1300 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1301 procsnz[i] += rowlengths[j]; 1302 } 1303 } 1304 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1305 1306 /* determine max buffer needed and allocate it */ 1307 maxnz = 0; 1308 for ( i=0; i<size; i++ ) { 1309 maxnz = PetscMax(maxnz,procsnz[i]); 1310 } 1311 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1312 1313 /* read in my part of the matrix column indices */ 1314 nz = procsnz[0]; 1315 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1316 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1317 1318 /* read in every one elses and ship off */ 1319 for ( i=1; i<size; i++ ) { 1320 nz = procsnz[i]; 1321 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1322 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1323 } 1324 ierr = PetscFree(cols);CHKERRQ(ierr); 1325 } else { 1326 /* determine buffer space needed for message */ 1327 nz = 0; 1328 for ( i=0; i<m; i++ ) { 1329 nz += ourlens[i]; 1330 } 1331 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1332 1333 /* receive message of column indices*/ 1334 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1335 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1336 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1337 } 1338 1339 /* loop over local rows, determining number of off diagonal entries */ 1340 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1341 jj = 0; 1342 for ( i=0; i<m; i++ ) { 1343 for ( j=0; j<ourlens[i]; j++ ) { 1344 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1345 jj++; 1346 } 1347 } 1348 1349 /* create our matrix */ 1350 for ( i=0; i<m; i++ ) { 1351 ourlens[i] -= offlens[i]; 1352 } 1353 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1354 A = *newmat; 1355 for ( i=0; i<m; i++ ) { 1356 ourlens[i] += offlens[i]; 1357 } 1358 1359 if (!rank) { 1360 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1361 1362 /* read in my part of the matrix numerical values */ 1363 nz = procsnz[0]; 1364 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1365 1366 /* insert into matrix */ 1367 jj = rstart; 1368 smycols = mycols; 1369 svals = vals; 1370 for ( i=0; i<m; i++ ) { 1371 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1372 smycols += ourlens[i]; 1373 svals += ourlens[i]; 1374 jj++; 1375 } 1376 1377 /* read in other processors and ship out */ 1378 for ( i=1; i<size; i++ ) { 1379 nz = procsnz[i]; 1380 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1381 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1384 } else { 1385 /* receive numeric values */ 1386 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1387 1388 /* receive message of values*/ 1389 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1390 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1391 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1392 1393 /* insert into matrix */ 1394 jj = rstart; 1395 smycols = mycols; 1396 svals = vals; 1397 for ( i=0; i<m; i++ ) { 1398 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1399 smycols += ourlens[i]; 1400 svals += ourlens[i]; 1401 jj++; 1402 } 1403 } 1404 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1405 ierr = PetscFree(vals);CHKERRQ(ierr); 1406 ierr = PetscFree(mycols);CHKERRQ(ierr); 1407 ierr = PetscFree(rowners);CHKERRQ(ierr); 1408 1409 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1410 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 1415 1416 1417 1418