1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.70 1997/05/03 21:11:31 curfman Exp balay $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNC__ 13 #define __FUNC__ "MatSetValues_MPIDense" 14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 15 { 16 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 17 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 18 int roworiented = A->roworiented; 19 20 for ( i=0; i<m; i++ ) { 21 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 22 if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large"); 23 if (idxm[i] >= rstart && idxm[i] < rend) { 24 row = idxm[i] - rstart; 25 if (roworiented) { 26 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 27 } 28 else { 29 for ( j=0; j<n; j++ ) { 30 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 31 if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large"); 32 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 33 } 34 } 35 } 36 else { 37 if (roworiented) { 38 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 39 } 40 else { /* must stash each seperately */ 41 row = idxm[i]; 42 for ( j=0; j<n; j++ ) { 43 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 44 } 45 } 46 } 47 } 48 return 0; 49 } 50 51 #undef __FUNC__ 52 #define __FUNC__ "MatGetValues_MPIDense" 53 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 54 { 55 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 56 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 57 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 65 if (idxn[j] >= mdn->N) 66 SETERRQ(1,0,"Column too large"); 67 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68 } 69 } 70 else { 71 SETERRQ(1,0,"Only local values currently supported"); 72 } 73 } 74 return 0; 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 85 return 0; 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreArray_MPIDense" 90 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 91 { 92 return 0; 93 } 94 95 #undef __FUNC__ 96 #define __FUNC__ "MatAssemblyBegin_MPIDense" 97 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 98 { 99 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 100 MPI_Comm comm = mat->comm; 101 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 102 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 103 int tag = mat->tag, *owner,*starts,count,ierr; 104 InsertMode addv; 105 MPI_Request *send_waits,*recv_waits; 106 Scalar *rvalues,*svalues; 107 108 /* make sure all processors are either in INSERTMODE or ADDMODE */ 109 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 110 if (addv == (ADD_VALUES|INSERT_VALUES)) { 111 SETERRQ(1,0,"Cannot mix adds/inserts on different procs"); 112 } 113 mat->insertmode = addv; /* in case this processor had no cache */ 114 115 /* first count number of contributors to each processor */ 116 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 117 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 118 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 119 for ( i=0; i<mdn->stash.n; i++ ) { 120 idx = mdn->stash.idx[i]; 121 for ( j=0; j<size; j++ ) { 122 if (idx >= owners[j] && idx < owners[j+1]) { 123 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 124 } 125 } 126 } 127 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 128 129 /* inform other processors of number of messages and max length*/ 130 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 131 MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 132 nreceives = work[rank]; 133 if (nreceives > size) SETERRQ(1,0,"Internal PETSc error"); 134 MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 135 nmax = work[rank]; 136 PetscFree(work); 137 138 /* post receives: 139 1) each message will consist of ordered pairs 140 (global index,value) we store the global index as a double 141 to simplify the message passing. 142 2) since we don't know how long each individual message is we 143 allocate the largest needed buffer for each receive. Potentially 144 this is a lot of wasted space. 145 146 This could be done better. 147 */ 148 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 149 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 150 for ( i=0; i<nreceives; i++ ) { 151 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 152 } 153 154 /* do sends: 155 1) starts[i] gives the starting index in svalues for stuff going to 156 the ith processor 157 */ 158 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 159 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 160 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 for ( i=0; i<mdn->stash.n; i++ ) { 164 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 165 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 166 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 167 } 168 PetscFree(owner); 169 starts[0] = 0; 170 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 171 count = 0; 172 for ( i=0; i<size; i++ ) { 173 if (procs[i]) { 174 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 175 } 176 } 177 PetscFree(starts); PetscFree(nprocs); 178 179 /* Free cache space */ 180 PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 181 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 182 183 mdn->svalues = svalues; mdn->rvalues = rvalues; 184 mdn->nsends = nsends; mdn->nrecvs = nreceives; 185 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 186 mdn->rmax = nmax; 187 188 return 0; 189 } 190 extern int MatSetUpMultiply_MPIDense(Mat); 191 192 #undef __FUNC__ 193 #define __FUNC__ "MatAssemblyEnd_MPIDense" 194 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 195 { 196 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 197 MPI_Status *send_status,recv_status; 198 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 199 Scalar *values,val; 200 InsertMode addv = mat->insertmode; 201 202 /* wait on receives */ 203 while (count) { 204 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 205 /* unpack receives into our local space */ 206 values = mdn->rvalues + 3*imdex*mdn->rmax; 207 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 208 n = n/3; 209 for ( i=0; i<n; i++ ) { 210 row = (int) PetscReal(values[3*i]) - mdn->rstart; 211 col = (int) PetscReal(values[3*i+1]); 212 val = values[3*i+2]; 213 if (col >= 0 && col < mdn->N) { 214 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 215 } 216 else {SETERRQ(1,0,"Invalid column");} 217 } 218 count--; 219 } 220 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 221 222 /* wait on sends */ 223 if (mdn->nsends) { 224 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 225 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 226 PetscFree(send_status); 227 } 228 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 229 230 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 235 } 236 return 0; 237 } 238 239 #undef __FUNC__ 240 #define __FUNC__ "MatZeroEntries_MPIDense" 241 int MatZeroEntries_MPIDense(Mat A) 242 { 243 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 244 return MatZeroEntries(l->A); 245 } 246 247 #undef __FUNC__ 248 #define __FUNC__ "MatGetBlockSize_MPIDense" 249 int MatGetBlockSize_MPIDense(Mat A,int *bs) 250 { 251 *bs = 1; 252 return 0; 253 } 254 255 /* the code does not do the diagonal entries correctly unless the 256 matrix is square and the column and row owerships are identical. 257 This is a BUG. The only way to fix it seems to be to access 258 mdn->A and mdn->B directly and not through the MatZeroRows() 259 routine. 260 */ 261 #undef __FUNC__ 262 #define __FUNC__ "MatZeroRows_MPIDense" 263 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 264 { 265 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 266 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 267 int *procs,*nprocs,j,found,idx,nsends,*work; 268 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 269 int *rvalues,tag = A->tag,count,base,slen,n,*source; 270 int *lens,imdex,*lrows,*values; 271 MPI_Comm comm = A->comm; 272 MPI_Request *send_waits,*recv_waits; 273 MPI_Status recv_status,*send_status; 274 IS istmp; 275 276 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 277 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 278 279 /* first count number of contributors to each processor */ 280 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 281 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 282 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 283 for ( i=0; i<N; i++ ) { 284 idx = rows[i]; 285 found = 0; 286 for ( j=0; j<size; j++ ) { 287 if (idx >= owners[j] && idx < owners[j+1]) { 288 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 289 } 290 } 291 if (!found) SETERRQ(1,0,"Index out of range"); 292 } 293 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 294 295 /* inform other processors of number of messages and max length*/ 296 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 297 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 298 nrecvs = work[rank]; 299 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 300 nmax = work[rank]; 301 PetscFree(work); 302 303 /* post receives: */ 304 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 305 CHKPTRQ(rvalues); 306 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 307 CHKPTRQ(recv_waits); 308 for ( i=0; i<nrecvs; i++ ) { 309 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 310 } 311 312 /* do sends: 313 1) starts[i] gives the starting index in svalues for stuff going to 314 the ith processor 315 */ 316 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 317 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 318 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 319 starts[0] = 0; 320 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 321 for ( i=0; i<N; i++ ) { 322 svalues[starts[owner[i]]++] = rows[i]; 323 } 324 ISRestoreIndices(is,&rows); 325 326 starts[0] = 0; 327 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 328 count = 0; 329 for ( i=0; i<size; i++ ) { 330 if (procs[i]) { 331 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 332 } 333 } 334 PetscFree(starts); 335 336 base = owners[rank]; 337 338 /* wait on receives */ 339 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 340 source = lens + nrecvs; 341 count = nrecvs; slen = 0; 342 while (count) { 343 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 344 /* unpack receives into our local space */ 345 MPI_Get_count(&recv_status,MPI_INT,&n); 346 source[imdex] = recv_status.MPI_SOURCE; 347 lens[imdex] = n; 348 slen += n; 349 count--; 350 } 351 PetscFree(recv_waits); 352 353 /* move the data into the send scatter */ 354 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 355 count = 0; 356 for ( i=0; i<nrecvs; i++ ) { 357 values = rvalues + i*nmax; 358 for ( j=0; j<lens[i]; j++ ) { 359 lrows[count++] = values[j] - base; 360 } 361 } 362 PetscFree(rvalues); PetscFree(lens); 363 PetscFree(owner); PetscFree(nprocs); 364 365 /* actually zap the local rows */ 366 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 367 PLogObjectParent(A,istmp); 368 PetscFree(lrows); 369 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 370 ierr = ISDestroy(istmp); CHKERRQ(ierr); 371 372 /* wait on sends */ 373 if (nsends) { 374 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 375 MPI_Waitall(nsends,send_waits,send_status); 376 PetscFree(send_status); 377 } 378 PetscFree(send_waits); PetscFree(svalues); 379 380 return 0; 381 } 382 383 #undef __FUNC__ 384 #define __FUNC__ "MatMult_MPIDense" 385 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 386 { 387 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 388 int ierr; 389 390 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 391 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 393 return 0; 394 } 395 396 #undef __FUNC__ 397 #define __FUNC__ "MatMultAdd_MPIDense" 398 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 399 { 400 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 401 int ierr; 402 403 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 404 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 405 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 406 return 0; 407 } 408 409 #undef __FUNC__ 410 #define __FUNC__ "MatMultTrans_MPIDense" 411 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 412 { 413 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 414 int ierr; 415 Scalar zero = 0.0; 416 417 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 418 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 419 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 420 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 421 return 0; 422 } 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatMultTransAdd_MPIDense" 426 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 427 { 428 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 429 int ierr; 430 431 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 432 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 433 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 434 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 435 return 0; 436 } 437 438 #undef __FUNC__ 439 #define __FUNC__ "MatGetDiagonal_MPIDense" 440 int MatGetDiagonal_MPIDense(Mat A,Vec v) 441 { 442 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 443 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 444 int ierr, len, i, n, m = a->m, radd; 445 Scalar *x, zero = 0.0; 446 447 VecSet(&zero,v); 448 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 449 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 450 if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec"); 451 len = PetscMin(aloc->m,aloc->n); 452 radd = a->rstart*m; 453 for ( i=0; i<len; i++ ) { 454 x[i] = aloc->v[radd + i*m + i]; 455 } 456 return 0; 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatDestroy_MPIDense" 461 int MatDestroy_MPIDense(PetscObject obj) 462 { 463 Mat mat = (Mat) obj; 464 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 465 int ierr; 466 467 #if defined(PETSC_LOG) 468 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 469 #endif 470 PetscFree(mdn->rowners); 471 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 472 if (mdn->lvec) VecDestroy(mdn->lvec); 473 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 474 if (mdn->factor) { 475 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 476 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 477 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 478 PetscFree(mdn->factor); 479 } 480 PetscFree(mdn); 481 if (mat->mapping) { 482 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 483 } 484 PLogObjectDestroy(mat); 485 PetscHeaderDestroy(mat); 486 return 0; 487 } 488 489 #include "pinclude/pviewer.h" 490 491 #undef __FUNC__ 492 #define __FUNC__ "MatView_MPIDense_Binary" 493 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 496 int ierr; 497 498 if (mdn->size == 1) { 499 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 500 } 501 else SETERRQ(1,0,"Only uniprocessor output supported"); 502 return 0; 503 } 504 505 #undef __FUNC__ 506 #define __FUNC__ "MatView_MPIDense_ASCII" 507 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 508 { 509 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 510 int ierr, format; 511 FILE *fd; 512 ViewerType vtype; 513 514 ViewerGetType(viewer,&vtype); 515 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 516 ierr = ViewerGetFormat(viewer,&format); 517 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 518 int rank; 519 MatInfo info; 520 MPI_Comm_rank(mat->comm,&rank); 521 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 522 PetscSequentialPhaseBegin(mat->comm,1); 523 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 524 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 525 fflush(fd); 526 PetscSequentialPhaseEnd(mat->comm,1); 527 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 528 return 0; 529 } 530 else if (format == VIEWER_FORMAT_ASCII_INFO) { 531 return 0; 532 } 533 if (vtype == ASCII_FILE_VIEWER) { 534 PetscSequentialPhaseBegin(mat->comm,1); 535 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 536 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 537 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 538 fflush(fd); 539 PetscSequentialPhaseEnd(mat->comm,1); 540 } 541 else { 542 int size = mdn->size, rank = mdn->rank; 543 if (size == 1) { 544 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 545 } 546 else { 547 /* assemble the entire matrix onto first processor. */ 548 Mat A; 549 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 550 Scalar *vals; 551 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 552 553 if (!rank) { 554 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 555 } 556 else { 557 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 558 } 559 PLogObjectParent(mat,A); 560 561 /* Copy the matrix ... This isn't the most efficient means, 562 but it's quick for now */ 563 row = mdn->rstart; m = Amdn->m; 564 for ( i=0; i<m; i++ ) { 565 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 566 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 567 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 568 row++; 569 } 570 571 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 572 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 573 if (!rank) { 574 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 575 } 576 ierr = MatDestroy(A); CHKERRQ(ierr); 577 } 578 } 579 return 0; 580 } 581 582 #undef __FUNC__ 583 #define __FUNC__ "MatView_MPIDense" 584 int MatView_MPIDense(PetscObject obj,Viewer viewer) 585 { 586 Mat mat = (Mat) obj; 587 int ierr; 588 ViewerType vtype; 589 590 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 591 if (vtype == ASCII_FILE_VIEWER) { 592 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 593 } 594 else if (vtype == ASCII_FILES_VIEWER) { 595 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 596 } 597 else if (vtype == BINARY_FILE_VIEWER) { 598 return MatView_MPIDense_Binary(mat,viewer); 599 } 600 return 0; 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatGetInfo_MPIDense" 605 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 606 { 607 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 608 Mat mdn = mat->A; 609 int ierr; 610 double isend[5], irecv[5]; 611 612 info->rows_global = (double)mat->M; 613 info->columns_global = (double)mat->N; 614 info->rows_local = (double)mat->m; 615 info->columns_local = (double)mat->N; 616 info->block_size = 1.0; 617 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 618 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 619 isend[3] = info->memory; isend[4] = info->mallocs; 620 if (flag == MAT_LOCAL) { 621 info->nz_used = isend[0]; 622 info->nz_allocated = isend[1]; 623 info->nz_unneeded = isend[2]; 624 info->memory = isend[3]; 625 info->mallocs = isend[4]; 626 } else if (flag == MAT_GLOBAL_MAX) { 627 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm); 628 info->nz_used = irecv[0]; 629 info->nz_allocated = irecv[1]; 630 info->nz_unneeded = irecv[2]; 631 info->memory = irecv[3]; 632 info->mallocs = irecv[4]; 633 } else if (flag == MAT_GLOBAL_SUM) { 634 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm); 635 info->nz_used = irecv[0]; 636 info->nz_allocated = irecv[1]; 637 info->nz_unneeded = irecv[2]; 638 info->memory = irecv[3]; 639 info->mallocs = irecv[4]; 640 } 641 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 642 info->fill_ratio_needed = 0; 643 info->factor_mallocs = 0; 644 return 0; 645 } 646 647 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 648 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 649 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 650 extern int MatSolve_MPIDense(Mat,Vec,Vec); 651 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 652 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 653 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatSetOption_MPIDense" 657 int MatSetOption_MPIDense(Mat A,MatOption op) 658 { 659 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 660 661 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 662 op == MAT_YES_NEW_NONZERO_LOCATIONS || 663 op == MAT_NEW_NONZERO_LOCATION_ERROR || 664 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 665 op == MAT_COLUMNS_SORTED || 666 op == MAT_COLUMNS_UNSORTED) { 667 MatSetOption(a->A,op); 668 } else if (op == MAT_ROW_ORIENTED) { 669 a->roworiented = 1; 670 MatSetOption(a->A,op); 671 } else if (op == MAT_ROWS_SORTED || 672 op == MAT_ROWS_UNSORTED || 673 op == MAT_SYMMETRIC || 674 op == MAT_STRUCTURALLY_SYMMETRIC || 675 op == MAT_YES_NEW_DIAGONALS) 676 PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 677 else if (op == MAT_COLUMN_ORIENTED) 678 {a->roworiented = 0; MatSetOption(a->A,op);} 679 else if (op == MAT_NO_NEW_DIAGONALS) 680 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 681 else 682 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 683 return 0; 684 } 685 686 #undef __FUNC__ 687 #define __FUNC__ "MatGetSize_MPIDense" 688 int MatGetSize_MPIDense(Mat A,int *m,int *n) 689 { 690 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 691 *m = mat->M; *n = mat->N; 692 return 0; 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ "MatGetLocalSize_MPIDense" 697 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 698 { 699 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 700 *m = mat->m; *n = mat->N; 701 return 0; 702 } 703 704 #undef __FUNC__ 705 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 706 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 707 { 708 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 709 *m = mat->rstart; *n = mat->rend; 710 return 0; 711 } 712 713 #undef __FUNC__ 714 #define __FUNC__ "MatGetRow_MPIDense" 715 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 716 { 717 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 718 int lrow, rstart = mat->rstart, rend = mat->rend; 719 720 if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows") 721 lrow = row - rstart; 722 return MatGetRow(mat->A,lrow,nz,idx,v); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatRestoreRow_MPIDense" 727 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 728 { 729 if (idx) PetscFree(*idx); 730 if (v) PetscFree(*v); 731 return 0; 732 } 733 734 #undef __FUNC__ 735 #define __FUNC__ "MatNorm_MPIDense" 736 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 737 { 738 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 739 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 740 int ierr, i, j; 741 double sum = 0.0; 742 Scalar *v = mat->v; 743 744 if (mdn->size == 1) { 745 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 746 } else { 747 if (type == NORM_FROBENIUS) { 748 for (i=0; i<mat->n*mat->m; i++ ) { 749 #if defined(PETSC_COMPLEX) 750 sum += real(conj(*v)*(*v)); v++; 751 #else 752 sum += (*v)*(*v); v++; 753 #endif 754 } 755 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 756 *norm = sqrt(*norm); 757 PLogFlops(2*mat->n*mat->m); 758 } 759 else if (type == NORM_1) { 760 double *tmp, *tmp2; 761 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 762 tmp2 = tmp + mdn->N; 763 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 764 *norm = 0.0; 765 v = mat->v; 766 for ( j=0; j<mat->n; j++ ) { 767 for ( i=0; i<mat->m; i++ ) { 768 tmp[j] += PetscAbsScalar(*v); v++; 769 } 770 } 771 MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 772 for ( j=0; j<mdn->N; j++ ) { 773 if (tmp2[j] > *norm) *norm = tmp2[j]; 774 } 775 PetscFree(tmp); 776 PLogFlops(mat->n*mat->m); 777 } 778 else if (type == NORM_INFINITY) { /* max row norm */ 779 double ntemp; 780 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 781 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 782 } 783 else { 784 SETERRQ(1,0,"No support for two norm"); 785 } 786 } 787 return 0; 788 } 789 790 #undef __FUNC__ 791 #define __FUNC__ "MatTranspose_MPIDense" 792 int MatTranspose_MPIDense(Mat A,Mat *matout) 793 { 794 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 795 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 796 Mat B; 797 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 798 int j, i, ierr; 799 Scalar *v; 800 801 if (matout == PETSC_NULL && M != N) { 802 SETERRQ(1,0,"Supports square matrix only in-place"); 803 } 804 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 805 806 m = Aloc->m; n = Aloc->n; v = Aloc->v; 807 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 808 for ( j=0; j<n; j++ ) { 809 for (i=0; i<m; i++) rwork[i] = rstart + i; 810 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 811 v += m; 812 } 813 PetscFree(rwork); 814 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 816 if (matout != PETSC_NULL) { 817 *matout = B; 818 } else { 819 /* This isn't really an in-place transpose, but free data struct from a */ 820 PetscFree(a->rowners); 821 ierr = MatDestroy(a->A); CHKERRQ(ierr); 822 if (a->lvec) VecDestroy(a->lvec); 823 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 824 PetscFree(a); 825 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 826 PetscHeaderDestroy(B); 827 } 828 return 0; 829 } 830 831 #include "pinclude/plapack.h" 832 #undef __FUNC__ 833 #define __FUNC__ "MatScale_MPIDense" 834 int MatScale_MPIDense(Scalar *alpha,Mat inA) 835 { 836 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 837 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 838 int one = 1, nz; 839 840 nz = a->m*a->n; 841 BLscal_( &nz, alpha, a->v, &one ); 842 PLogFlops(nz); 843 return 0; 844 } 845 846 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 847 848 /* -------------------------------------------------------------------*/ 849 static struct _MatOps MatOps = {MatSetValues_MPIDense, 850 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 851 MatMult_MPIDense,MatMultAdd_MPIDense, 852 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 853 /* MatSolve_MPIDense,0, */ 854 0,0, 855 0,0, 856 0,0, 857 /* MatLUFactor_MPIDense,0, */ 858 0,MatTranspose_MPIDense, 859 MatGetInfo_MPIDense,0, 860 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 861 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 862 0, 863 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 864 0,0, 865 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 866 0,0, 867 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 868 MatGetOwnershipRange_MPIDense, 869 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 870 MatConvertSameType_MPIDense, 871 0,0,0,0,0, 872 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 873 0,0,0,MatGetBlockSize_MPIDense}; 874 875 #undef __FUNC__ 876 #define __FUNC__ "MatCreateMPIDense" 877 /*@C 878 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 879 880 Input Parameters: 881 . comm - MPI communicator 882 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 883 . n - number of local columns (or PETSC_DECIDE to have calculated 884 if N is given) 885 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 886 . N - number of global columns (or PETSC_DECIDE to have calculated 887 if n is given) 888 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 889 to control all matrix memory allocation. 890 891 Output Parameter: 892 . A - the matrix 893 894 Notes: 895 The dense format is fully compatible with standard Fortran 77 896 storage by columns. 897 898 The data input variable is intended primarily for Fortran programmers 899 who wish to allocate their own matrix memory space. Most users should 900 set data=PETSC_NULL. 901 902 The user MUST specify either the local or global matrix dimensions 903 (possibly both). 904 905 Currently, the only parallel dense matrix decomposition is by rows, 906 so that n=N and each submatrix owns all of the global columns. 907 908 .keywords: matrix, dense, parallel 909 910 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 911 @*/ 912 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 913 { 914 Mat mat; 915 Mat_MPIDense *a; 916 int ierr, i,flg; 917 918 /* Note: For now, when data is specified above, this assumes the user correctly 919 allocates the local dense storage space. We should add error checking. */ 920 921 *A = 0; 922 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIDENSE,comm); 923 PLogObjectCreate(mat); 924 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 925 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 926 mat->destroy = MatDestroy_MPIDense; 927 mat->view = MatView_MPIDense; 928 mat->factor = 0; 929 mat->mapping = 0; 930 931 a->factor = 0; 932 mat->insertmode = NOT_SET_VALUES; 933 MPI_Comm_rank(comm,&a->rank); 934 MPI_Comm_size(comm,&a->size); 935 936 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 937 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 938 939 /* each row stores all columns */ 940 if (N == PETSC_DECIDE) N = n; 941 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 942 /* if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */ 943 a->N = mat->N = N; 944 a->M = mat->M = M; 945 a->m = mat->m = m; 946 a->n = mat->n = n; 947 948 /* build local table of row and column ownerships */ 949 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 950 a->cowners = a->rowners + a->size + 1; 951 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 952 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 953 a->rowners[0] = 0; 954 for ( i=2; i<=a->size; i++ ) { 955 a->rowners[i] += a->rowners[i-1]; 956 } 957 a->rstart = a->rowners[a->rank]; 958 a->rend = a->rowners[a->rank+1]; 959 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 960 a->cowners[0] = 0; 961 for ( i=2; i<=a->size; i++ ) { 962 a->cowners[i] += a->cowners[i-1]; 963 } 964 965 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 966 PLogObjectParent(mat,a->A); 967 968 /* build cache for off array entries formed */ 969 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 970 971 /* stuff used for matrix vector multiply */ 972 a->lvec = 0; 973 a->Mvctx = 0; 974 a->roworiented = 1; 975 976 *A = mat; 977 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 978 if (flg) { 979 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 980 } 981 return 0; 982 } 983 984 #undef __FUNC__ 985 #define __FUNC__ "MatConvertSameType_MPIDense" 986 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 987 { 988 Mat mat; 989 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 990 int ierr; 991 FactorCtx *factor; 992 993 *newmat = 0; 994 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 995 PLogObjectCreate(mat); 996 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 997 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 998 mat->destroy = MatDestroy_MPIDense; 999 mat->view = MatView_MPIDense; 1000 mat->factor = A->factor; 1001 mat->assembled = PETSC_TRUE; 1002 1003 a->m = mat->m = oldmat->m; 1004 a->n = mat->n = oldmat->n; 1005 a->M = mat->M = oldmat->M; 1006 a->N = mat->N = oldmat->N; 1007 if (oldmat->factor) { 1008 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 1009 /* copy factor contents ... add this code! */ 1010 } else a->factor = 0; 1011 1012 a->rstart = oldmat->rstart; 1013 a->rend = oldmat->rend; 1014 a->size = oldmat->size; 1015 a->rank = oldmat->rank; 1016 mat->insertmode = NOT_SET_VALUES; 1017 1018 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1019 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1020 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1021 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1022 1023 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1024 PLogObjectParent(mat,a->lvec); 1025 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1026 PLogObjectParent(mat,a->Mvctx); 1027 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1028 PLogObjectParent(mat,a->A); 1029 *newmat = mat; 1030 return 0; 1031 } 1032 1033 #include "sys.h" 1034 1035 #undef __FUNC__ 1036 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1037 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1038 { 1039 int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 1040 Scalar *array,*vals,*vals_ptr; 1041 MPI_Status status; 1042 1043 MPI_Comm_rank(comm,&rank); 1044 MPI_Comm_size(comm,&size); 1045 1046 /* determine ownership of all rows */ 1047 m = M/size + ((M % size) > rank); 1048 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1049 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1050 rowners[0] = 0; 1051 for ( i=2; i<=size; i++ ) { 1052 rowners[i] += rowners[i-1]; 1053 } 1054 rstart = rowners[rank]; 1055 rend = rowners[rank+1]; 1056 1057 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1058 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 1059 1060 if (!rank) { 1061 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1062 1063 /* read in my part of the matrix numerical values */ 1064 ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 1065 1066 /* insert into matrix-by row (this is why cannot directly read into array */ 1067 vals_ptr = vals; 1068 for ( i=0; i<m; i++ ) { 1069 for ( j=0; j<N; j++ ) { 1070 array[i + j*m] = *vals_ptr++; 1071 } 1072 } 1073 1074 /* read in other processors and ship out */ 1075 for ( i=1; i<size; i++ ) { 1076 nz = (rowners[i+1] - rowners[i])*N; 1077 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1078 MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 1079 } 1080 } 1081 else { 1082 /* receive numeric values */ 1083 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1084 1085 /* receive message of values*/ 1086 MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 1087 1088 /* insert into matrix-by row (this is why cannot directly read into array */ 1089 vals_ptr = vals; 1090 for ( i=0; i<m; i++ ) { 1091 for ( j=0; j<N; j++ ) { 1092 array[i + j*m] = *vals_ptr++; 1093 } 1094 } 1095 } 1096 PetscFree(rowners); 1097 PetscFree(vals); 1098 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1099 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1100 return 0; 1101 } 1102 1103 1104 #undef __FUNC__ 1105 #define __FUNC__ "MatLoad_MPIDense" 1106 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1107 { 1108 Mat A; 1109 int i, nz, ierr, j,rstart, rend, fd; 1110 Scalar *vals,*svals; 1111 MPI_Comm comm = ((PetscObject)viewer)->comm; 1112 MPI_Status status; 1113 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1114 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1115 int tag = ((PetscObject)viewer)->tag; 1116 1117 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1118 if (!rank) { 1119 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1120 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1121 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1122 } 1123 1124 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1125 M = header[1]; N = header[2]; nz = header[3]; 1126 1127 /* 1128 Handle case where matrix is stored on disk as a dense matrix 1129 */ 1130 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1131 return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 1132 } 1133 1134 /* determine ownership of all rows */ 1135 m = M/size + ((M % size) > rank); 1136 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1137 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1138 rowners[0] = 0; 1139 for ( i=2; i<=size; i++ ) { 1140 rowners[i] += rowners[i-1]; 1141 } 1142 rstart = rowners[rank]; 1143 rend = rowners[rank+1]; 1144 1145 /* distribute row lengths to all processors */ 1146 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1147 offlens = ourlens + (rend-rstart); 1148 if (!rank) { 1149 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1150 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1151 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1152 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1153 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1154 PetscFree(sndcounts); 1155 } 1156 else { 1157 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1158 } 1159 1160 if (!rank) { 1161 /* calculate the number of nonzeros on each processor */ 1162 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1163 PetscMemzero(procsnz,size*sizeof(int)); 1164 for ( i=0; i<size; i++ ) { 1165 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1166 procsnz[i] += rowlengths[j]; 1167 } 1168 } 1169 PetscFree(rowlengths); 1170 1171 /* determine max buffer needed and allocate it */ 1172 maxnz = 0; 1173 for ( i=0; i<size; i++ ) { 1174 maxnz = PetscMax(maxnz,procsnz[i]); 1175 } 1176 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1177 1178 /* read in my part of the matrix column indices */ 1179 nz = procsnz[0]; 1180 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1181 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1182 1183 /* read in every one elses and ship off */ 1184 for ( i=1; i<size; i++ ) { 1185 nz = procsnz[i]; 1186 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1187 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1188 } 1189 PetscFree(cols); 1190 } 1191 else { 1192 /* determine buffer space needed for message */ 1193 nz = 0; 1194 for ( i=0; i<m; i++ ) { 1195 nz += ourlens[i]; 1196 } 1197 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1198 1199 /* receive message of column indices*/ 1200 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1201 MPI_Get_count(&status,MPI_INT,&maxnz); 1202 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1203 } 1204 1205 /* loop over local rows, determining number of off diagonal entries */ 1206 PetscMemzero(offlens,m*sizeof(int)); 1207 jj = 0; 1208 for ( i=0; i<m; i++ ) { 1209 for ( j=0; j<ourlens[i]; j++ ) { 1210 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1211 jj++; 1212 } 1213 } 1214 1215 /* create our matrix */ 1216 for ( i=0; i<m; i++ ) { 1217 ourlens[i] -= offlens[i]; 1218 } 1219 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1220 A = *newmat; 1221 for ( i=0; i<m; i++ ) { 1222 ourlens[i] += offlens[i]; 1223 } 1224 1225 if (!rank) { 1226 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1227 1228 /* read in my part of the matrix numerical values */ 1229 nz = procsnz[0]; 1230 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1231 1232 /* insert into matrix */ 1233 jj = rstart; 1234 smycols = mycols; 1235 svals = vals; 1236 for ( i=0; i<m; i++ ) { 1237 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1238 smycols += ourlens[i]; 1239 svals += ourlens[i]; 1240 jj++; 1241 } 1242 1243 /* read in other processors and ship out */ 1244 for ( i=1; i<size; i++ ) { 1245 nz = procsnz[i]; 1246 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1247 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1248 } 1249 PetscFree(procsnz); 1250 } 1251 else { 1252 /* receive numeric values */ 1253 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1254 1255 /* receive message of values*/ 1256 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1257 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1258 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1259 1260 /* insert into matrix */ 1261 jj = rstart; 1262 smycols = mycols; 1263 svals = vals; 1264 for ( i=0; i<m; i++ ) { 1265 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1266 smycols += ourlens[i]; 1267 svals += ourlens[i]; 1268 jj++; 1269 } 1270 } 1271 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1272 1273 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1274 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1275 return 0; 1276 } 1277 1278 1279 1280 1281 1282