1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.68 1997/03/13 16:33:44 curfman Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNC__ 13 #define __FUNC__ "MatSetValues_MPIDense" 14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 15 { 16 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 17 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 18 int roworiented = A->roworiented; 19 20 for ( i=0; i<m; i++ ) { 21 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 22 if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large"); 23 if (idxm[i] >= rstart && idxm[i] < rend) { 24 row = idxm[i] - rstart; 25 if (roworiented) { 26 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 27 } 28 else { 29 for ( j=0; j<n; j++ ) { 30 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 31 if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large"); 32 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 33 } 34 } 35 } 36 else { 37 if (roworiented) { 38 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 39 } 40 else { /* must stash each seperately */ 41 row = idxm[i]; 42 for ( j=0; j<n; j++ ) { 43 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 44 } 45 } 46 } 47 } 48 return 0; 49 } 50 51 #undef __FUNC__ 52 #define __FUNC__ "MatGetValues_MPIDense" 53 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 54 { 55 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 56 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 57 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 65 if (idxn[j] >= mdn->N) 66 SETERRQ(1,0,"Column too large"); 67 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68 } 69 } 70 else { 71 SETERRQ(1,0,"Only local values currently supported"); 72 } 73 } 74 return 0; 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 85 return 0; 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreArray_MPIDense" 90 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 91 { 92 return 0; 93 } 94 95 #undef __FUNC__ 96 #define __FUNC__ "MatAssemblyBegin_MPIDense" 97 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 98 { 99 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 100 MPI_Comm comm = mat->comm; 101 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 102 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 103 int tag = mat->tag, *owner,*starts,count,ierr; 104 InsertMode addv; 105 MPI_Request *send_waits,*recv_waits; 106 Scalar *rvalues,*svalues; 107 108 /* make sure all processors are either in INSERTMODE or ADDMODE */ 109 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 110 if (addv == (ADD_VALUES|INSERT_VALUES)) { 111 SETERRQ(1,0,"Cannot mix adds/inserts on different procs"); 112 } 113 mat->insertmode = addv; /* in case this processor had no cache */ 114 115 /* first count number of contributors to each processor */ 116 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 117 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 118 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 119 for ( i=0; i<mdn->stash.n; i++ ) { 120 idx = mdn->stash.idx[i]; 121 for ( j=0; j<size; j++ ) { 122 if (idx >= owners[j] && idx < owners[j+1]) { 123 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 124 } 125 } 126 } 127 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 128 129 /* inform other processors of number of messages and max length*/ 130 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 131 MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 132 nreceives = work[rank]; 133 if (nreceives > size) SETERRQ(1,0,"Internal PETSc error"); 134 MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 135 nmax = work[rank]; 136 PetscFree(work); 137 138 /* post receives: 139 1) each message will consist of ordered pairs 140 (global index,value) we store the global index as a double 141 to simplify the message passing. 142 2) since we don't know how long each individual message is we 143 allocate the largest needed buffer for each receive. Potentially 144 this is a lot of wasted space. 145 146 This could be done better. 147 */ 148 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 149 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 150 for ( i=0; i<nreceives; i++ ) { 151 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 152 } 153 154 /* do sends: 155 1) starts[i] gives the starting index in svalues for stuff going to 156 the ith processor 157 */ 158 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 159 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 160 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 for ( i=0; i<mdn->stash.n; i++ ) { 164 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 165 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 166 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 167 } 168 PetscFree(owner); 169 starts[0] = 0; 170 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 171 count = 0; 172 for ( i=0; i<size; i++ ) { 173 if (procs[i]) { 174 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 175 } 176 } 177 PetscFree(starts); PetscFree(nprocs); 178 179 /* Free cache space */ 180 PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 181 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 182 183 mdn->svalues = svalues; mdn->rvalues = rvalues; 184 mdn->nsends = nsends; mdn->nrecvs = nreceives; 185 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 186 mdn->rmax = nmax; 187 188 return 0; 189 } 190 extern int MatSetUpMultiply_MPIDense(Mat); 191 192 #undef __FUNC__ 193 #define __FUNC__ "MatAssemblyEnd_MPIDense" 194 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 195 { 196 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 197 MPI_Status *send_status,recv_status; 198 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 199 Scalar *values,val; 200 InsertMode addv = mat->insertmode; 201 202 /* wait on receives */ 203 while (count) { 204 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 205 /* unpack receives into our local space */ 206 values = mdn->rvalues + 3*imdex*mdn->rmax; 207 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 208 n = n/3; 209 for ( i=0; i<n; i++ ) { 210 row = (int) PetscReal(values[3*i]) - mdn->rstart; 211 col = (int) PetscReal(values[3*i+1]); 212 val = values[3*i+2]; 213 if (col >= 0 && col < mdn->N) { 214 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 215 } 216 else {SETERRQ(1,0,"Invalid column");} 217 } 218 count--; 219 } 220 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 221 222 /* wait on sends */ 223 if (mdn->nsends) { 224 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 225 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 226 PetscFree(send_status); 227 } 228 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 229 230 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 235 } 236 return 0; 237 } 238 239 #undef __FUNC__ 240 #define __FUNC__ "MatZeroEntries_MPIDense" 241 int MatZeroEntries_MPIDense(Mat A) 242 { 243 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 244 return MatZeroEntries(l->A); 245 } 246 247 #undef __FUNC__ 248 #define __FUNC__ "MatGetBlockSize_MPIDense" 249 int MatGetBlockSize_MPIDense(Mat A,int *bs) 250 { 251 *bs = 1; 252 return 0; 253 } 254 255 /* the code does not do the diagonal entries correctly unless the 256 matrix is square and the column and row owerships are identical. 257 This is a BUG. The only way to fix it seems to be to access 258 mdn->A and mdn->B directly and not through the MatZeroRows() 259 routine. 260 */ 261 #undef __FUNC__ 262 #define __FUNC__ "MatZeroRows_MPIDense" 263 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 264 { 265 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 266 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 267 int *procs,*nprocs,j,found,idx,nsends,*work; 268 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 269 int *rvalues,tag = A->tag,count,base,slen,n,*source; 270 int *lens,imdex,*lrows,*values; 271 MPI_Comm comm = A->comm; 272 MPI_Request *send_waits,*recv_waits; 273 MPI_Status recv_status,*send_status; 274 IS istmp; 275 276 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 277 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 278 279 /* first count number of contributors to each processor */ 280 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 281 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 282 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 283 for ( i=0; i<N; i++ ) { 284 idx = rows[i]; 285 found = 0; 286 for ( j=0; j<size; j++ ) { 287 if (idx >= owners[j] && idx < owners[j+1]) { 288 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 289 } 290 } 291 if (!found) SETERRQ(1,0,"Index out of range"); 292 } 293 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 294 295 /* inform other processors of number of messages and max length*/ 296 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 297 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 298 nrecvs = work[rank]; 299 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 300 nmax = work[rank]; 301 PetscFree(work); 302 303 /* post receives: */ 304 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 305 CHKPTRQ(rvalues); 306 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 307 CHKPTRQ(recv_waits); 308 for ( i=0; i<nrecvs; i++ ) { 309 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 310 } 311 312 /* do sends: 313 1) starts[i] gives the starting index in svalues for stuff going to 314 the ith processor 315 */ 316 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 317 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 318 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 319 starts[0] = 0; 320 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 321 for ( i=0; i<N; i++ ) { 322 svalues[starts[owner[i]]++] = rows[i]; 323 } 324 ISRestoreIndices(is,&rows); 325 326 starts[0] = 0; 327 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 328 count = 0; 329 for ( i=0; i<size; i++ ) { 330 if (procs[i]) { 331 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 332 } 333 } 334 PetscFree(starts); 335 336 base = owners[rank]; 337 338 /* wait on receives */ 339 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 340 source = lens + nrecvs; 341 count = nrecvs; slen = 0; 342 while (count) { 343 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 344 /* unpack receives into our local space */ 345 MPI_Get_count(&recv_status,MPI_INT,&n); 346 source[imdex] = recv_status.MPI_SOURCE; 347 lens[imdex] = n; 348 slen += n; 349 count--; 350 } 351 PetscFree(recv_waits); 352 353 /* move the data into the send scatter */ 354 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 355 count = 0; 356 for ( i=0; i<nrecvs; i++ ) { 357 values = rvalues + i*nmax; 358 for ( j=0; j<lens[i]; j++ ) { 359 lrows[count++] = values[j] - base; 360 } 361 } 362 PetscFree(rvalues); PetscFree(lens); 363 PetscFree(owner); PetscFree(nprocs); 364 365 /* actually zap the local rows */ 366 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 367 PLogObjectParent(A,istmp); 368 PetscFree(lrows); 369 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 370 ierr = ISDestroy(istmp); CHKERRQ(ierr); 371 372 /* wait on sends */ 373 if (nsends) { 374 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 375 MPI_Waitall(nsends,send_waits,send_status); 376 PetscFree(send_status); 377 } 378 PetscFree(send_waits); PetscFree(svalues); 379 380 return 0; 381 } 382 383 #undef __FUNC__ 384 #define __FUNC__ "MatMult_MPIDense" 385 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 386 { 387 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 388 int ierr; 389 390 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 391 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 393 return 0; 394 } 395 396 #undef __FUNC__ 397 #define __FUNC__ "MatMultAdd_MPIDense" 398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 399 { 400 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 401 int ierr; 402 403 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 404 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 405 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 406 return 0; 407 } 408 409 #undef __FUNC__ 410 #define __FUNC__ "MatMultTrans_MPIDense" 411 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 412 { 413 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 414 int ierr; 415 Scalar zero = 0.0; 416 417 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 418 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 419 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 420 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 421 return 0; 422 } 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatMultTransAdd_MPIDense" 426 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 427 { 428 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 429 int ierr; 430 431 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 432 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 433 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 434 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 435 return 0; 436 } 437 438 #undef __FUNC__ 439 #define __FUNC__ "MatGetDiagonal_MPIDense" 440 int MatGetDiagonal_MPIDense(Mat A,Vec v) 441 { 442 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 443 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 444 int ierr, len, i, n, m = a->m, radd; 445 Scalar *x, zero = 0.0; 446 447 VecSet(&zero,v); 448 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 449 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 450 if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec"); 451 len = PetscMin(aloc->m,aloc->n); 452 radd = a->rstart*m; 453 for ( i=0; i<len; i++ ) { 454 x[i] = aloc->v[radd + i*m + i]; 455 } 456 return 0; 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatDestroy_MPIDense" 461 int MatDestroy_MPIDense(PetscObject obj) 462 { 463 Mat mat = (Mat) obj; 464 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 465 int ierr; 466 467 #if defined(PETSC_LOG) 468 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 469 #endif 470 PetscFree(mdn->rowners); 471 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 472 if (mdn->lvec) VecDestroy(mdn->lvec); 473 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 474 if (mdn->factor) { 475 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 476 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 477 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 478 PetscFree(mdn->factor); 479 } 480 PetscFree(mdn); 481 if (mat->mapping) { 482 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 483 } 484 PLogObjectDestroy(mat); 485 PetscHeaderDestroy(mat); 486 return 0; 487 } 488 489 #include "pinclude/pviewer.h" 490 491 #undef __FUNC__ 492 #define __FUNC__ "MatView_MPIDense_Binary" 493 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 496 int ierr; 497 498 if (mdn->size == 1) { 499 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 500 } 501 else SETERRQ(1,0,"Only uniprocessor output supported"); 502 return 0; 503 } 504 505 #undef __FUNC__ 506 #define __FUNC__ "MatView_MPIDense_ASCII" 507 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 508 { 509 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 510 int ierr, format; 511 FILE *fd; 512 ViewerType vtype; 513 514 ViewerGetType(viewer,&vtype); 515 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 516 ierr = ViewerGetFormat(viewer,&format); 517 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 518 int rank; 519 MatInfo info; 520 MPI_Comm_rank(mat->comm,&rank); 521 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 522 PetscSequentialPhaseBegin(mat->comm,1); 523 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 524 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 525 fflush(fd); 526 PetscSequentialPhaseEnd(mat->comm,1); 527 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 528 return 0; 529 } 530 else if (format == VIEWER_FORMAT_ASCII_INFO) { 531 return 0; 532 } 533 if (vtype == ASCII_FILE_VIEWER) { 534 PetscSequentialPhaseBegin(mat->comm,1); 535 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 536 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 537 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 538 fflush(fd); 539 PetscSequentialPhaseEnd(mat->comm,1); 540 } 541 else { 542 int size = mdn->size, rank = mdn->rank; 543 if (size == 1) { 544 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 545 } 546 else { 547 /* assemble the entire matrix onto first processor. */ 548 Mat A; 549 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 550 Scalar *vals; 551 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 552 553 if (!rank) { 554 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 555 } 556 else { 557 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 558 } 559 PLogObjectParent(mat,A); 560 561 /* Copy the matrix ... This isn't the most efficient means, 562 but it's quick for now */ 563 row = mdn->rstart; m = Amdn->m; 564 for ( i=0; i<m; i++ ) { 565 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 566 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 567 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 568 row++; 569 } 570 571 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 572 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 573 if (!rank) { 574 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 575 } 576 ierr = MatDestroy(A); CHKERRQ(ierr); 577 } 578 } 579 return 0; 580 } 581 582 #undef __FUNC__ 583 #define __FUNC__ "MatView_MPIDense" 584 int MatView_MPIDense(PetscObject obj,Viewer viewer) 585 { 586 Mat mat = (Mat) obj; 587 int ierr; 588 ViewerType vtype; 589 590 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 591 if (vtype == ASCII_FILE_VIEWER) { 592 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 593 } 594 else if (vtype == ASCII_FILES_VIEWER) { 595 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 596 } 597 else if (vtype == BINARY_FILE_VIEWER) { 598 return MatView_MPIDense_Binary(mat,viewer); 599 } 600 return 0; 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatGetInfo_MPIDense" 605 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 606 { 607 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 608 Mat mdn = mat->A; 609 int ierr; 610 double isend[5], irecv[5]; 611 612 info->rows_global = (double)mat->M; 613 info->columns_global = (double)mat->N; 614 info->rows_local = (double)mat->m; 615 info->columns_local = (double)mat->N; 616 info->block_size = 1.0; 617 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 618 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 619 isend[3] = info->memory; isend[4] = info->mallocs; 620 if (flag == MAT_LOCAL) { 621 info->nz_used = isend[0]; 622 info->nz_allocated = isend[1]; 623 info->nz_unneeded = isend[2]; 624 info->memory = isend[3]; 625 info->mallocs = isend[4]; 626 } else if (flag == MAT_GLOBAL_MAX) { 627 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm); 628 info->nz_used = irecv[0]; 629 info->nz_allocated = irecv[1]; 630 info->nz_unneeded = irecv[2]; 631 info->memory = irecv[3]; 632 info->mallocs = irecv[4]; 633 } else if (flag == MAT_GLOBAL_SUM) { 634 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm); 635 info->nz_used = irecv[0]; 636 info->nz_allocated = irecv[1]; 637 info->nz_unneeded = irecv[2]; 638 info->memory = irecv[3]; 639 info->mallocs = irecv[4]; 640 } 641 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 642 info->fill_ratio_needed = 0; 643 info->factor_mallocs = 0; 644 return 0; 645 } 646 647 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 648 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 649 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 650 extern int MatSolve_MPIDense(Mat,Vec,Vec); 651 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 652 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 653 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatSetOption_MPIDense" 657 int MatSetOption_MPIDense(Mat A,MatOption op) 658 { 659 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 660 661 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 662 op == MAT_YES_NEW_NONZERO_LOCATIONS || 663 op == MAT_NEW_NONZERO_LOCATION_ERROR || 664 op == MAT_COLUMNS_SORTED || 665 op == MAT_COLUMNS_UNSORTED) { 666 MatSetOption(a->A,op); 667 } else if (op == MAT_ROW_ORIENTED) { 668 a->roworiented = 1; 669 MatSetOption(a->A,op); 670 } else if (op == MAT_ROWS_SORTED || 671 op == MAT_ROWS_UNSORTED || 672 op == MAT_SYMMETRIC || 673 op == MAT_STRUCTURALLY_SYMMETRIC || 674 op == MAT_YES_NEW_DIAGONALS) 675 PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 676 else if (op == MAT_COLUMN_ORIENTED) 677 {a->roworiented = 0; MatSetOption(a->A,op);} 678 else if (op == MAT_NO_NEW_DIAGONALS) 679 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 680 else 681 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 682 return 0; 683 } 684 685 #undef __FUNC__ 686 #define __FUNC__ "MatGetSize_MPIDense" 687 int MatGetSize_MPIDense(Mat A,int *m,int *n) 688 { 689 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 690 *m = mat->M; *n = mat->N; 691 return 0; 692 } 693 694 #undef __FUNC__ 695 #define __FUNC__ "MatGetLocalSize_MPIDense" 696 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 697 { 698 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 699 *m = mat->m; *n = mat->N; 700 return 0; 701 } 702 703 #undef __FUNC__ 704 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 705 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 706 { 707 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 708 *m = mat->rstart; *n = mat->rend; 709 return 0; 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatGetRow_MPIDense" 714 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 715 { 716 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 717 int lrow, rstart = mat->rstart, rend = mat->rend; 718 719 if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows") 720 lrow = row - rstart; 721 return MatGetRow(mat->A,lrow,nz,idx,v); 722 } 723 724 #undef __FUNC__ 725 #define __FUNC__ "MatRestoreRow_MPIDense" 726 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 727 { 728 if (idx) PetscFree(*idx); 729 if (v) PetscFree(*v); 730 return 0; 731 } 732 733 #undef __FUNC__ 734 #define __FUNC__ "MatNorm_MPIDense" 735 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 736 { 737 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 738 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 739 int ierr, i, j; 740 double sum = 0.0; 741 Scalar *v = mat->v; 742 743 if (mdn->size == 1) { 744 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 745 } else { 746 if (type == NORM_FROBENIUS) { 747 for (i=0; i<mat->n*mat->m; i++ ) { 748 #if defined(PETSC_COMPLEX) 749 sum += real(conj(*v)*(*v)); v++; 750 #else 751 sum += (*v)*(*v); v++; 752 #endif 753 } 754 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 755 *norm = sqrt(*norm); 756 PLogFlops(2*mat->n*mat->m); 757 } 758 else if (type == NORM_1) { 759 double *tmp, *tmp2; 760 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 761 tmp2 = tmp + mdn->N; 762 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 763 *norm = 0.0; 764 v = mat->v; 765 for ( j=0; j<mat->n; j++ ) { 766 for ( i=0; i<mat->m; i++ ) { 767 tmp[j] += PetscAbsScalar(*v); v++; 768 } 769 } 770 MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 771 for ( j=0; j<mdn->N; j++ ) { 772 if (tmp2[j] > *norm) *norm = tmp2[j]; 773 } 774 PetscFree(tmp); 775 PLogFlops(mat->n*mat->m); 776 } 777 else if (type == NORM_INFINITY) { /* max row norm */ 778 double ntemp; 779 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 780 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 781 } 782 else { 783 SETERRQ(1,0,"No support for two norm"); 784 } 785 } 786 return 0; 787 } 788 789 #undef __FUNC__ 790 #define __FUNC__ "MatTranspose_MPIDense" 791 int MatTranspose_MPIDense(Mat A,Mat *matout) 792 { 793 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 794 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 795 Mat B; 796 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 797 int j, i, ierr; 798 Scalar *v; 799 800 if (matout == PETSC_NULL && M != N) { 801 SETERRQ(1,0,"Supports square matrix only in-place"); 802 } 803 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 804 805 m = Aloc->m; n = Aloc->n; v = Aloc->v; 806 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 807 for ( j=0; j<n; j++ ) { 808 for (i=0; i<m; i++) rwork[i] = rstart + i; 809 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 810 v += m; 811 } 812 PetscFree(rwork); 813 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 814 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 815 if (matout != PETSC_NULL) { 816 *matout = B; 817 } else { 818 /* This isn't really an in-place transpose, but free data struct from a */ 819 PetscFree(a->rowners); 820 ierr = MatDestroy(a->A); CHKERRQ(ierr); 821 if (a->lvec) VecDestroy(a->lvec); 822 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 823 PetscFree(a); 824 PetscMemcpy(A,B,sizeof(struct _Mat)); 825 PetscHeaderDestroy(B); 826 } 827 return 0; 828 } 829 830 #include "pinclude/plapack.h" 831 #undef __FUNC__ 832 #define __FUNC__ "MatScale_MPIDense" 833 int MatScale_MPIDense(Scalar *alpha,Mat inA) 834 { 835 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 836 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 837 int one = 1, nz; 838 839 nz = a->m*a->n; 840 BLscal_( &nz, alpha, a->v, &one ); 841 PLogFlops(nz); 842 return 0; 843 } 844 845 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 846 847 /* -------------------------------------------------------------------*/ 848 static struct _MatOps MatOps = {MatSetValues_MPIDense, 849 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 850 MatMult_MPIDense,MatMultAdd_MPIDense, 851 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 852 /* MatSolve_MPIDense,0, */ 853 0,0, 854 0,0, 855 0,0, 856 /* MatLUFactor_MPIDense,0, */ 857 0,MatTranspose_MPIDense, 858 MatGetInfo_MPIDense,0, 859 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 860 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 861 0, 862 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 863 0,0, 864 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 865 0,0, 866 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 867 MatGetOwnershipRange_MPIDense, 868 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 869 MatConvertSameType_MPIDense, 870 0,0,0,0,0, 871 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 872 0,0,0,MatGetBlockSize_MPIDense}; 873 874 #undef __FUNC__ 875 #define __FUNC__ "MatCreateMPIDense" 876 /*@C 877 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 878 879 Input Parameters: 880 . comm - MPI communicator 881 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 882 . n - number of local columns (or PETSC_DECIDE to have calculated 883 if N is given) 884 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 885 . N - number of global columns (or PETSC_DECIDE to have calculated 886 if n is given) 887 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 888 to control all matrix memory allocation. 889 890 Output Parameter: 891 . A - the matrix 892 893 Notes: 894 The dense format is fully compatible with standard Fortran 77 895 storage by columns. 896 897 The data input variable is intended primarily for Fortran programmers 898 who wish to allocate their own matrix memory space. Most users should 899 set data=PETSC_NULL. 900 901 The user MUST specify either the local or global matrix dimensions 902 (possibly both). 903 904 Currently, the only parallel dense matrix decomposition is by rows, 905 so that n=N and each submatrix owns all of the global columns. 906 907 .keywords: matrix, dense, parallel 908 909 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 910 @*/ 911 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 912 { 913 Mat mat; 914 Mat_MPIDense *a; 915 int ierr, i,flg; 916 917 /* Note: For now, when data is specified above, this assumes the user correctly 918 allocates the local dense storage space. We should add error checking. */ 919 920 *A = 0; 921 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 922 PLogObjectCreate(mat); 923 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 924 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 925 mat->destroy = MatDestroy_MPIDense; 926 mat->view = MatView_MPIDense; 927 mat->factor = 0; 928 mat->mapping = 0; 929 930 a->factor = 0; 931 mat->insertmode = NOT_SET_VALUES; 932 MPI_Comm_rank(comm,&a->rank); 933 MPI_Comm_size(comm,&a->size); 934 935 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 936 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 937 938 /* each row stores all columns */ 939 if (N == PETSC_DECIDE) N = n; 940 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 941 /* if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */ 942 a->N = mat->N = N; 943 a->M = mat->M = M; 944 a->m = mat->m = m; 945 a->n = mat->n = n; 946 947 /* build local table of row and column ownerships */ 948 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 949 a->cowners = a->rowners + a->size + 1; 950 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 951 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 952 a->rowners[0] = 0; 953 for ( i=2; i<=a->size; i++ ) { 954 a->rowners[i] += a->rowners[i-1]; 955 } 956 a->rstart = a->rowners[a->rank]; 957 a->rend = a->rowners[a->rank+1]; 958 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 959 a->cowners[0] = 0; 960 for ( i=2; i<=a->size; i++ ) { 961 a->cowners[i] += a->cowners[i-1]; 962 } 963 964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 965 PLogObjectParent(mat,a->A); 966 967 /* build cache for off array entries formed */ 968 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 969 970 /* stuff used for matrix vector multiply */ 971 a->lvec = 0; 972 a->Mvctx = 0; 973 a->roworiented = 1; 974 975 *A = mat; 976 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 977 if (flg) { 978 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 979 } 980 return 0; 981 } 982 983 #undef __FUNC__ 984 #define __FUNC__ "MatConvertSameType_MPIDense" 985 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 986 { 987 Mat mat; 988 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 989 int ierr; 990 FactorCtx *factor; 991 992 *newmat = 0; 993 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 994 PLogObjectCreate(mat); 995 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 996 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 997 mat->destroy = MatDestroy_MPIDense; 998 mat->view = MatView_MPIDense; 999 mat->factor = A->factor; 1000 mat->assembled = PETSC_TRUE; 1001 1002 a->m = mat->m = oldmat->m; 1003 a->n = mat->n = oldmat->n; 1004 a->M = mat->M = oldmat->M; 1005 a->N = mat->N = oldmat->N; 1006 if (oldmat->factor) { 1007 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 1008 /* copy factor contents ... add this code! */ 1009 } else a->factor = 0; 1010 1011 a->rstart = oldmat->rstart; 1012 a->rend = oldmat->rend; 1013 a->size = oldmat->size; 1014 a->rank = oldmat->rank; 1015 mat->insertmode = NOT_SET_VALUES; 1016 1017 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1018 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 1019 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1020 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1021 1022 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1023 PLogObjectParent(mat,a->lvec); 1024 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1025 PLogObjectParent(mat,a->Mvctx); 1026 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1027 PLogObjectParent(mat,a->A); 1028 *newmat = mat; 1029 return 0; 1030 } 1031 1032 #include "sys.h" 1033 1034 #undef __FUNC__ 1035 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1036 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1037 { 1038 int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 1039 Scalar *array,*vals,*vals_ptr; 1040 MPI_Status status; 1041 1042 MPI_Comm_rank(comm,&rank); 1043 MPI_Comm_size(comm,&size); 1044 1045 /* determine ownership of all rows */ 1046 m = M/size + ((M % size) > rank); 1047 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1048 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1049 rowners[0] = 0; 1050 for ( i=2; i<=size; i++ ) { 1051 rowners[i] += rowners[i-1]; 1052 } 1053 rstart = rowners[rank]; 1054 rend = rowners[rank+1]; 1055 1056 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1057 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 1058 1059 if (!rank) { 1060 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1061 1062 /* read in my part of the matrix numerical values */ 1063 ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 1064 1065 /* insert into matrix-by row (this is why cannot directly read into array */ 1066 vals_ptr = vals; 1067 for ( i=0; i<m; i++ ) { 1068 for ( j=0; j<N; j++ ) { 1069 array[i + j*m] = *vals_ptr++; 1070 } 1071 } 1072 1073 /* read in other processors and ship out */ 1074 for ( i=1; i<size; i++ ) { 1075 nz = (rowners[i+1] - rowners[i])*N; 1076 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1077 MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 1078 } 1079 } 1080 else { 1081 /* receive numeric values */ 1082 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1083 1084 /* receive message of values*/ 1085 MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 1086 1087 /* insert into matrix-by row (this is why cannot directly read into array */ 1088 vals_ptr = vals; 1089 for ( i=0; i<m; i++ ) { 1090 for ( j=0; j<N; j++ ) { 1091 array[i + j*m] = *vals_ptr++; 1092 } 1093 } 1094 } 1095 PetscFree(rowners); 1096 PetscFree(vals); 1097 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1098 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1099 return 0; 1100 } 1101 1102 1103 #undef __FUNC__ 1104 #define __FUNC__ "MatLoad_MPIDense" 1105 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1106 { 1107 Mat A; 1108 int i, nz, ierr, j,rstart, rend, fd; 1109 Scalar *vals,*svals; 1110 MPI_Comm comm = ((PetscObject)viewer)->comm; 1111 MPI_Status status; 1112 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1113 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1114 int tag = ((PetscObject)viewer)->tag; 1115 1116 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1117 if (!rank) { 1118 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1119 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1120 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1121 } 1122 1123 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1124 M = header[1]; N = header[2]; nz = header[3]; 1125 1126 /* 1127 Handle case where matrix is stored on disk as a dense matrix 1128 */ 1129 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1130 return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 1131 } 1132 1133 /* determine ownership of all rows */ 1134 m = M/size + ((M % size) > rank); 1135 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1136 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1137 rowners[0] = 0; 1138 for ( i=2; i<=size; i++ ) { 1139 rowners[i] += rowners[i-1]; 1140 } 1141 rstart = rowners[rank]; 1142 rend = rowners[rank+1]; 1143 1144 /* distribute row lengths to all processors */ 1145 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1146 offlens = ourlens + (rend-rstart); 1147 if (!rank) { 1148 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1149 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1150 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1151 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1152 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1153 PetscFree(sndcounts); 1154 } 1155 else { 1156 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1157 } 1158 1159 if (!rank) { 1160 /* calculate the number of nonzeros on each processor */ 1161 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1162 PetscMemzero(procsnz,size*sizeof(int)); 1163 for ( i=0; i<size; i++ ) { 1164 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1165 procsnz[i] += rowlengths[j]; 1166 } 1167 } 1168 PetscFree(rowlengths); 1169 1170 /* determine max buffer needed and allocate it */ 1171 maxnz = 0; 1172 for ( i=0; i<size; i++ ) { 1173 maxnz = PetscMax(maxnz,procsnz[i]); 1174 } 1175 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1176 1177 /* read in my part of the matrix column indices */ 1178 nz = procsnz[0]; 1179 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1180 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1181 1182 /* read in every one elses and ship off */ 1183 for ( i=1; i<size; i++ ) { 1184 nz = procsnz[i]; 1185 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1186 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1187 } 1188 PetscFree(cols); 1189 } 1190 else { 1191 /* determine buffer space needed for message */ 1192 nz = 0; 1193 for ( i=0; i<m; i++ ) { 1194 nz += ourlens[i]; 1195 } 1196 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1197 1198 /* receive message of column indices*/ 1199 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1200 MPI_Get_count(&status,MPI_INT,&maxnz); 1201 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1202 } 1203 1204 /* loop over local rows, determining number of off diagonal entries */ 1205 PetscMemzero(offlens,m*sizeof(int)); 1206 jj = 0; 1207 for ( i=0; i<m; i++ ) { 1208 for ( j=0; j<ourlens[i]; j++ ) { 1209 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1210 jj++; 1211 } 1212 } 1213 1214 /* create our matrix */ 1215 for ( i=0; i<m; i++ ) { 1216 ourlens[i] -= offlens[i]; 1217 } 1218 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1219 A = *newmat; 1220 for ( i=0; i<m; i++ ) { 1221 ourlens[i] += offlens[i]; 1222 } 1223 1224 if (!rank) { 1225 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1226 1227 /* read in my part of the matrix numerical values */ 1228 nz = procsnz[0]; 1229 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1230 1231 /* insert into matrix */ 1232 jj = rstart; 1233 smycols = mycols; 1234 svals = vals; 1235 for ( i=0; i<m; i++ ) { 1236 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1237 smycols += ourlens[i]; 1238 svals += ourlens[i]; 1239 jj++; 1240 } 1241 1242 /* read in other processors and ship out */ 1243 for ( i=1; i<size; i++ ) { 1244 nz = procsnz[i]; 1245 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1246 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1247 } 1248 PetscFree(procsnz); 1249 } 1250 else { 1251 /* receive numeric values */ 1252 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1253 1254 /* receive message of values*/ 1255 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1256 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1257 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1258 1259 /* insert into matrix */ 1260 jj = rstart; 1261 smycols = mycols; 1262 svals = vals; 1263 for ( i=0; i<m; i++ ) { 1264 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1265 smycols += ourlens[i]; 1266 svals += ourlens[i]; 1267 jj++; 1268 } 1269 } 1270 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1271 1272 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1273 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1274 return 0; 1275 } 1276 1277 1278 1279 1280 1281