1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.65 1997/03/09 17:40:01 curfman Exp curfman $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNC__ 13 #define __FUNC__ "MatSetValues_MPIDense" 14 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 15 { 16 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 17 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 18 int roworiented = A->roworiented; 19 20 for ( i=0; i<m; i++ ) { 21 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 22 if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large"); 23 if (idxm[i] >= rstart && idxm[i] < rend) { 24 row = idxm[i] - rstart; 25 if (roworiented) { 26 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 27 } 28 else { 29 for ( j=0; j<n; j++ ) { 30 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 31 if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large"); 32 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 33 } 34 } 35 } 36 else { 37 if (roworiented) { 38 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 39 } 40 else { /* must stash each seperately */ 41 row = idxm[i]; 42 for ( j=0; j<n; j++ ) { 43 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 44 } 45 } 46 } 47 } 48 return 0; 49 } 50 51 #undef __FUNC__ 52 #define __FUNC__ "MatGetValues_MPIDense" 53 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 54 { 55 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 56 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 57 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 65 if (idxn[j] >= mdn->N) 66 SETERRQ(1,0,"Column too large"); 67 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68 } 69 } 70 else { 71 SETERRQ(1,0,"Only local values currently supported"); 72 } 73 } 74 return 0; 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 static int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 85 return 0; 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreArray_MPIDense" 90 static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 91 { 92 return 0; 93 } 94 95 #undef __FUNC__ 96 #define __FUNC__ "MatAssemblyBegin_MPIDense" 97 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 98 { 99 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 100 MPI_Comm comm = mat->comm; 101 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 102 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 103 int tag = mat->tag, *owner,*starts,count,ierr; 104 InsertMode addv; 105 MPI_Request *send_waits,*recv_waits; 106 Scalar *rvalues,*svalues; 107 108 /* make sure all processors are either in INSERTMODE or ADDMODE */ 109 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 110 if (addv == (ADD_VALUES|INSERT_VALUES)) { 111 SETERRQ(1,0,"Cannot mix adds/inserts on different procs"); 112 } 113 mat->insertmode = addv; /* in case this processor had no cache */ 114 115 /* first count number of contributors to each processor */ 116 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 117 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 118 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 119 for ( i=0; i<mdn->stash.n; i++ ) { 120 idx = mdn->stash.idx[i]; 121 for ( j=0; j<size; j++ ) { 122 if (idx >= owners[j] && idx < owners[j+1]) { 123 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 124 } 125 } 126 } 127 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 128 129 /* inform other processors of number of messages and max length*/ 130 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 131 MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 132 nreceives = work[rank]; 133 if (nreceives > size) SETERRQ(1,0,"Internal PETSc error"); 134 MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 135 nmax = work[rank]; 136 PetscFree(work); 137 138 /* post receives: 139 1) each message will consist of ordered pairs 140 (global index,value) we store the global index as a double 141 to simplify the message passing. 142 2) since we don't know how long each individual message is we 143 allocate the largest needed buffer for each receive. Potentially 144 this is a lot of wasted space. 145 146 This could be done better. 147 */ 148 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 149 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 150 for ( i=0; i<nreceives; i++ ) { 151 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 152 } 153 154 /* do sends: 155 1) starts[i] gives the starting index in svalues for stuff going to 156 the ith processor 157 */ 158 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 159 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 160 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 for ( i=0; i<mdn->stash.n; i++ ) { 164 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 165 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 166 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 167 } 168 PetscFree(owner); 169 starts[0] = 0; 170 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 171 count = 0; 172 for ( i=0; i<size; i++ ) { 173 if (procs[i]) { 174 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 175 } 176 } 177 PetscFree(starts); PetscFree(nprocs); 178 179 /* Free cache space */ 180 PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 181 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 182 183 mdn->svalues = svalues; mdn->rvalues = rvalues; 184 mdn->nsends = nsends; mdn->nrecvs = nreceives; 185 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 186 mdn->rmax = nmax; 187 188 return 0; 189 } 190 extern int MatSetUpMultiply_MPIDense(Mat); 191 192 #undef __FUNC__ 193 #define __FUNC__ "MatAssemblyEnd_MPIDense" 194 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 195 { 196 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 197 MPI_Status *send_status,recv_status; 198 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 199 Scalar *values,val; 200 InsertMode addv = mat->insertmode; 201 202 /* wait on receives */ 203 while (count) { 204 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 205 /* unpack receives into our local space */ 206 values = mdn->rvalues + 3*imdex*mdn->rmax; 207 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 208 n = n/3; 209 for ( i=0; i<n; i++ ) { 210 row = (int) PetscReal(values[3*i]) - mdn->rstart; 211 col = (int) PetscReal(values[3*i+1]); 212 val = values[3*i+2]; 213 if (col >= 0 && col < mdn->N) { 214 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 215 } 216 else {SETERRQ(1,0,"Invalid column");} 217 } 218 count--; 219 } 220 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 221 222 /* wait on sends */ 223 if (mdn->nsends) { 224 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 225 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 226 PetscFree(send_status); 227 } 228 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 229 230 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 235 } 236 return 0; 237 } 238 239 #undef __FUNC__ 240 #define __FUNC__ "MatZeroEntries_MPIDense" 241 static int MatZeroEntries_MPIDense(Mat A) 242 { 243 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 244 return MatZeroEntries(l->A); 245 } 246 247 #undef __FUNC__ 248 #define __FUNC__ "MatGetBlockSize_MPIDense" 249 static int MatGetBlockSize_MPIDense(Mat A,int *bs) 250 { 251 *bs = 1; 252 return 0; 253 } 254 255 /* the code does not do the diagonal entries correctly unless the 256 matrix is square and the column and row owerships are identical. 257 This is a BUG. The only way to fix it seems to be to access 258 mdn->A and mdn->B directly and not through the MatZeroRows() 259 routine. 260 */ 261 #undef __FUNC__ 262 #define __FUNC__ "MatZeroRows_MPIDense" 263 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 264 { 265 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 266 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 267 int *procs,*nprocs,j,found,idx,nsends,*work; 268 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 269 int *rvalues,tag = A->tag,count,base,slen,n,*source; 270 int *lens,imdex,*lrows,*values; 271 MPI_Comm comm = A->comm; 272 MPI_Request *send_waits,*recv_waits; 273 MPI_Status recv_status,*send_status; 274 IS istmp; 275 276 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 277 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 278 279 /* first count number of contributors to each processor */ 280 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 281 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 282 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 283 for ( i=0; i<N; i++ ) { 284 idx = rows[i]; 285 found = 0; 286 for ( j=0; j<size; j++ ) { 287 if (idx >= owners[j] && idx < owners[j+1]) { 288 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 289 } 290 } 291 if (!found) SETERRQ(1,0,"Index out of range"); 292 } 293 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 294 295 /* inform other processors of number of messages and max length*/ 296 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 297 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 298 nrecvs = work[rank]; 299 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 300 nmax = work[rank]; 301 PetscFree(work); 302 303 /* post receives: */ 304 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 305 CHKPTRQ(rvalues); 306 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 307 CHKPTRQ(recv_waits); 308 for ( i=0; i<nrecvs; i++ ) { 309 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 310 } 311 312 /* do sends: 313 1) starts[i] gives the starting index in svalues for stuff going to 314 the ith processor 315 */ 316 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 317 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 318 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 319 starts[0] = 0; 320 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 321 for ( i=0; i<N; i++ ) { 322 svalues[starts[owner[i]]++] = rows[i]; 323 } 324 ISRestoreIndices(is,&rows); 325 326 starts[0] = 0; 327 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 328 count = 0; 329 for ( i=0; i<size; i++ ) { 330 if (procs[i]) { 331 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 332 } 333 } 334 PetscFree(starts); 335 336 base = owners[rank]; 337 338 /* wait on receives */ 339 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 340 source = lens + nrecvs; 341 count = nrecvs; slen = 0; 342 while (count) { 343 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 344 /* unpack receives into our local space */ 345 MPI_Get_count(&recv_status,MPI_INT,&n); 346 source[imdex] = recv_status.MPI_SOURCE; 347 lens[imdex] = n; 348 slen += n; 349 count--; 350 } 351 PetscFree(recv_waits); 352 353 /* move the data into the send scatter */ 354 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 355 count = 0; 356 for ( i=0; i<nrecvs; i++ ) { 357 values = rvalues + i*nmax; 358 for ( j=0; j<lens[i]; j++ ) { 359 lrows[count++] = values[j] - base; 360 } 361 } 362 PetscFree(rvalues); PetscFree(lens); 363 PetscFree(owner); PetscFree(nprocs); 364 365 /* actually zap the local rows */ 366 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 367 PLogObjectParent(A,istmp); 368 PetscFree(lrows); 369 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 370 ierr = ISDestroy(istmp); CHKERRQ(ierr); 371 372 /* wait on sends */ 373 if (nsends) { 374 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 375 MPI_Waitall(nsends,send_waits,send_status); 376 PetscFree(send_status); 377 } 378 PetscFree(send_waits); PetscFree(svalues); 379 380 return 0; 381 } 382 383 #undef __FUNC__ 384 #define __FUNC__ "MatMult_MPIDense" 385 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 386 { 387 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 388 int ierr; 389 390 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 391 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 393 return 0; 394 } 395 396 #undef __FUNC__ 397 #define __FUNC__ "MatMultAdd_MPIDense" 398 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 399 { 400 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 401 int ierr; 402 403 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 404 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 405 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 406 return 0; 407 } 408 409 #undef __FUNC__ 410 #define __FUNC__ "MatMultTrans_MPIDense" 411 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 412 { 413 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 414 int ierr; 415 Scalar zero = 0.0; 416 417 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 418 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 419 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 420 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 421 return 0; 422 } 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatMultTransAdd_MPIDense" 426 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 427 { 428 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 429 int ierr; 430 431 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 432 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 433 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 434 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 435 return 0; 436 } 437 438 #undef __FUNC__ 439 #define __FUNC__ "MatGetDiagonal_MPIDense" 440 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 441 { 442 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 443 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 444 int ierr, len, i, n, m = a->m, radd; 445 Scalar *x, zero = 0.0; 446 447 VecSet(&zero,v); 448 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 449 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 450 if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec"); 451 len = PetscMin(aloc->m,aloc->n); 452 radd = a->rstart*m; 453 for ( i=0; i<len; i++ ) { 454 x[i] = aloc->v[radd + i*m + i]; 455 } 456 return 0; 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatDestroy_MPIDense" 461 static int MatDestroy_MPIDense(PetscObject obj) 462 { 463 Mat mat = (Mat) obj; 464 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 465 int ierr; 466 467 #if defined(PETSC_LOG) 468 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 469 #endif 470 PetscFree(mdn->rowners); 471 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 472 if (mdn->lvec) VecDestroy(mdn->lvec); 473 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 474 if (mdn->factor) { 475 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 476 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 477 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 478 PetscFree(mdn->factor); 479 } 480 PetscFree(mdn); 481 if (mat->mapping) { 482 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 483 } 484 PLogObjectDestroy(mat); 485 PetscHeaderDestroy(mat); 486 return 0; 487 } 488 489 #include "pinclude/pviewer.h" 490 491 #undef __FUNC__ 492 #define __FUNC__ "MatView_MPIDense_Binary" 493 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 496 int ierr; 497 498 if (mdn->size == 1) { 499 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 500 } 501 else SETERRQ(1,0,"Only uniprocessor output supported"); 502 return 0; 503 } 504 505 #undef __FUNC__ 506 #define __FUNC__ "MatView_MPIDense_ASCII" 507 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 508 { 509 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 510 int ierr, format; 511 FILE *fd; 512 ViewerType vtype; 513 514 ViewerGetType(viewer,&vtype); 515 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 516 ierr = ViewerGetFormat(viewer,&format); 517 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 518 int rank; 519 MatInfo info; 520 MPI_Comm_rank(mat->comm,&rank); 521 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 522 PetscSequentialPhaseBegin(mat->comm,1); 523 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 524 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 525 fflush(fd); 526 PetscSequentialPhaseEnd(mat->comm,1); 527 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 528 return 0; 529 } 530 else if (format == VIEWER_FORMAT_ASCII_INFO) { 531 return 0; 532 } 533 if (vtype == ASCII_FILE_VIEWER) { 534 PetscSequentialPhaseBegin(mat->comm,1); 535 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 536 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 537 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 538 fflush(fd); 539 PetscSequentialPhaseEnd(mat->comm,1); 540 } 541 else { 542 int size = mdn->size, rank = mdn->rank; 543 if (size == 1) { 544 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 545 } 546 else { 547 /* assemble the entire matrix onto first processor. */ 548 Mat A; 549 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 550 Scalar *vals; 551 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 552 553 if (!rank) { 554 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 555 } 556 else { 557 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 558 } 559 PLogObjectParent(mat,A); 560 561 /* Copy the matrix ... This isn't the most efficient means, 562 but it's quick for now */ 563 row = mdn->rstart; m = Amdn->m; 564 for ( i=0; i<m; i++ ) { 565 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 566 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 567 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 568 row++; 569 } 570 571 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 572 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 573 if (!rank) { 574 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 575 } 576 ierr = MatDestroy(A); CHKERRQ(ierr); 577 } 578 } 579 return 0; 580 } 581 582 #undef __FUNC__ 583 #define __FUNC__ "MatView_MPIDense" 584 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 585 { 586 Mat mat = (Mat) obj; 587 int ierr; 588 ViewerType vtype; 589 590 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 591 if (vtype == ASCII_FILE_VIEWER) { 592 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 593 } 594 else if (vtype == ASCII_FILES_VIEWER) { 595 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 596 } 597 else if (vtype == BINARY_FILE_VIEWER) { 598 return MatView_MPIDense_Binary(mat,viewer); 599 } 600 return 0; 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatGetInfo_MPIDense" 605 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 606 { 607 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 608 Mat mdn = mat->A; 609 int ierr; 610 double isend[5], irecv[5]; 611 612 info->rows_global = (double)mat->M; 613 info->columns_global = (double)mat->N; 614 info->rows_local = (double)mat->m; 615 info->columns_local = (double)mat->N; 616 info->block_size = 1.0; 617 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 618 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 619 isend[3] = info->memory; isend[4] = info->mallocs; 620 if (flag == MAT_LOCAL) { 621 info->nz_used = isend[0]; 622 info->nz_allocated = isend[1]; 623 info->nz_unneeded = isend[2]; 624 info->memory = isend[3]; 625 info->mallocs = isend[4]; 626 } else if (flag == MAT_GLOBAL_MAX) { 627 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm); 628 info->nz_used = irecv[0]; 629 info->nz_allocated = irecv[1]; 630 info->nz_unneeded = irecv[2]; 631 info->memory = irecv[3]; 632 info->mallocs = irecv[4]; 633 } else if (flag == MAT_GLOBAL_SUM) { 634 MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm); 635 info->nz_used = irecv[0]; 636 info->nz_allocated = irecv[1]; 637 info->nz_unneeded = irecv[2]; 638 info->memory = irecv[3]; 639 info->mallocs = irecv[4]; 640 } 641 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 642 info->fill_ratio_needed = 0; 643 info->factor_mallocs = 0; 644 return 0; 645 } 646 647 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 648 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 649 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 650 extern int MatSolve_MPIDense(Mat,Vec,Vec); 651 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 652 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 653 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatSetOption_MPIDense" 657 static int MatSetOption_MPIDense(Mat A,MatOption op) 658 { 659 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 660 661 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 662 op == MAT_YES_NEW_NONZERO_LOCATIONS || 663 op == MAT_COLUMNS_SORTED || 664 op == MAT_COLUMNS_UNSORTED) { 665 MatSetOption(a->A,op); 666 } else if (op == MAT_ROW_ORIENTED) { 667 a->roworiented = 1; 668 MatSetOption(a->A,op); 669 } else if (op == MAT_ROWS_SORTED || 670 op == MAT_ROWS_UNSORTED || 671 op == MAT_SYMMETRIC || 672 op == MAT_STRUCTURALLY_SYMMETRIC || 673 op == MAT_YES_NEW_DIAGONALS) 674 PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 675 else if (op == MAT_COLUMN_ORIENTED) 676 {a->roworiented = 0; MatSetOption(a->A,op);} 677 else if (op == MAT_NO_NEW_DIAGONALS) 678 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 679 else 680 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 681 return 0; 682 } 683 684 #undef __FUNC__ 685 #define __FUNC__ "MatGetSize_MPIDense" 686 static int MatGetSize_MPIDense(Mat A,int *m,int *n) 687 { 688 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 689 *m = mat->M; *n = mat->N; 690 return 0; 691 } 692 693 #undef __FUNC__ 694 #define __FUNC__ "MatGetLocalSize_MPIDense" 695 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 696 { 697 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 698 *m = mat->m; *n = mat->N; 699 return 0; 700 } 701 702 #undef __FUNC__ 703 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 704 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 705 { 706 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 707 *m = mat->rstart; *n = mat->rend; 708 return 0; 709 } 710 711 #undef __FUNC__ 712 #define __FUNC__ "MatGetRow_MPIDense" 713 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 714 { 715 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 716 int lrow, rstart = mat->rstart, rend = mat->rend; 717 718 if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows") 719 lrow = row - rstart; 720 return MatGetRow(mat->A,lrow,nz,idx,v); 721 } 722 723 #undef __FUNC__ 724 #define __FUNC__ "MatRestoreRow_MPIDense" 725 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 726 { 727 if (idx) PetscFree(*idx); 728 if (v) PetscFree(*v); 729 return 0; 730 } 731 732 #undef __FUNC__ 733 #define __FUNC__ "MatNorm_MPIDense" 734 static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 735 { 736 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 737 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 738 int ierr, i, j; 739 double sum = 0.0; 740 Scalar *v = mat->v; 741 742 if (mdn->size == 1) { 743 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 744 } else { 745 if (type == NORM_FROBENIUS) { 746 for (i=0; i<mat->n*mat->m; i++ ) { 747 #if defined(PETSC_COMPLEX) 748 sum += real(conj(*v)*(*v)); v++; 749 #else 750 sum += (*v)*(*v); v++; 751 #endif 752 } 753 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 754 *norm = sqrt(*norm); 755 PLogFlops(2*mat->n*mat->m); 756 } 757 else if (type == NORM_1) { 758 double *tmp, *tmp2; 759 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 760 tmp2 = tmp + mdn->N; 761 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 762 *norm = 0.0; 763 v = mat->v; 764 for ( j=0; j<mat->n; j++ ) { 765 for ( i=0; i<mat->m; i++ ) { 766 tmp[j] += PetscAbsScalar(*v); v++; 767 } 768 } 769 MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 770 for ( j=0; j<mdn->N; j++ ) { 771 if (tmp2[j] > *norm) *norm = tmp2[j]; 772 } 773 PetscFree(tmp); 774 PLogFlops(mat->n*mat->m); 775 } 776 else if (type == NORM_INFINITY) { /* max row norm */ 777 double ntemp; 778 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 779 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 780 } 781 else { 782 SETERRQ(1,0,"No support for two norm"); 783 } 784 } 785 return 0; 786 } 787 788 #undef __FUNC__ 789 #define __FUNC__ "MatTranspose_MPIDense" 790 static int MatTranspose_MPIDense(Mat A,Mat *matout) 791 { 792 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 793 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 794 Mat B; 795 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 796 int j, i, ierr; 797 Scalar *v; 798 799 if (matout == PETSC_NULL && M != N) { 800 SETERRQ(1,0,"Supports square matrix only in-place"); 801 } 802 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 803 804 m = Aloc->m; n = Aloc->n; v = Aloc->v; 805 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 806 for ( j=0; j<n; j++ ) { 807 for (i=0; i<m; i++) rwork[i] = rstart + i; 808 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 809 v += m; 810 } 811 PetscFree(rwork); 812 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 813 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 814 if (matout != PETSC_NULL) { 815 *matout = B; 816 } else { 817 /* This isn't really an in-place transpose, but free data struct from a */ 818 PetscFree(a->rowners); 819 ierr = MatDestroy(a->A); CHKERRQ(ierr); 820 if (a->lvec) VecDestroy(a->lvec); 821 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 822 PetscFree(a); 823 PetscMemcpy(A,B,sizeof(struct _Mat)); 824 PetscHeaderDestroy(B); 825 } 826 return 0; 827 } 828 829 #include "pinclude/plapack.h" 830 #undef __FUNC__ 831 #define __FUNC__ "MatScale_MPIDense" 832 static int MatScale_MPIDense(Scalar *alpha,Mat inA) 833 { 834 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 835 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 836 int one = 1, nz; 837 838 nz = a->m*a->n; 839 BLscal_( &nz, alpha, a->v, &one ); 840 PLogFlops(nz); 841 return 0; 842 } 843 844 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 845 846 /* -------------------------------------------------------------------*/ 847 static struct _MatOps MatOps = {MatSetValues_MPIDense, 848 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 849 MatMult_MPIDense,MatMultAdd_MPIDense, 850 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 851 /* MatSolve_MPIDense,0, */ 852 0,0, 853 0,0, 854 0,0, 855 /* MatLUFactor_MPIDense,0, */ 856 0,MatTranspose_MPIDense, 857 MatGetInfo_MPIDense,0, 858 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 859 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 860 0, 861 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 862 0,0, 863 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 864 0,0, 865 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 866 MatGetOwnershipRange_MPIDense, 867 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 868 MatConvertSameType_MPIDense, 869 0,0,0,0,0, 870 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 871 0,0,0,MatGetBlockSize_MPIDense}; 872 873 #undef __FUNC__ 874 #define __FUNC__ "MatCreateMPIDense" 875 /*@C 876 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 877 878 Input Parameters: 879 . comm - MPI communicator 880 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 881 . n - number of local columns (or PETSC_DECIDE to have calculated 882 if N is given) 883 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 884 . N - number of global columns (or PETSC_DECIDE to have calculated 885 if n is given) 886 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 887 to control all matrix memory allocation. 888 889 Output Parameter: 890 . A - the matrix 891 892 Notes: 893 The dense format is fully compatible with standard Fortran 77 894 storage by columns. 895 896 The data input variable is intended primarily for Fortran programmers 897 who wish to allocate their own matrix memory space. Most users should 898 set data=PETSC_NULL. 899 900 The user MUST specify either the local or global matrix dimensions 901 (possibly both). 902 903 Currently, the only parallel dense matrix decomposition is by rows, 904 so that n=N and each submatrix owns all of the global columns. 905 906 .keywords: matrix, dense, parallel 907 908 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 909 @*/ 910 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 911 { 912 Mat mat; 913 Mat_MPIDense *a; 914 int ierr, i,flg; 915 916 /* Note: For now, when data is specified above, this assumes the user correctly 917 allocates the local dense storage space. We should add error checking. */ 918 919 *A = 0; 920 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 921 PLogObjectCreate(mat); 922 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 923 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 924 mat->destroy = MatDestroy_MPIDense; 925 mat->view = MatView_MPIDense; 926 mat->factor = 0; 927 mat->mapping = 0; 928 929 a->factor = 0; 930 mat->insertmode = NOT_SET_VALUES; 931 MPI_Comm_rank(comm,&a->rank); 932 MPI_Comm_size(comm,&a->size); 933 934 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 935 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 936 937 /* each row stores all columns */ 938 if (N == PETSC_DECIDE) N = n; 939 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 940 /* if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */ 941 a->N = mat->N = N; 942 a->M = mat->M = M; 943 a->m = mat->m = m; 944 a->n = mat->n = n; 945 946 /* build local table of row and column ownerships */ 947 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 948 a->cowners = a->rowners + a->size + 1; 949 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 950 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 951 a->rowners[0] = 0; 952 for ( i=2; i<=a->size; i++ ) { 953 a->rowners[i] += a->rowners[i-1]; 954 } 955 a->rstart = a->rowners[a->rank]; 956 a->rend = a->rowners[a->rank+1]; 957 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 958 a->cowners[0] = 0; 959 for ( i=2; i<=a->size; i++ ) { 960 a->cowners[i] += a->cowners[i-1]; 961 } 962 963 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 964 PLogObjectParent(mat,a->A); 965 966 /* build cache for off array entries formed */ 967 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 968 969 /* stuff used for matrix vector multiply */ 970 a->lvec = 0; 971 a->Mvctx = 0; 972 a->roworiented = 1; 973 974 *A = mat; 975 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 976 if (flg) { 977 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 978 } 979 return 0; 980 } 981 982 #undef __FUNC__ 983 #define __FUNC__ "MatConvertSameType_MPIDense" 984 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 985 { 986 Mat mat; 987 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 988 int ierr; 989 FactorCtx *factor; 990 991 *newmat = 0; 992 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 993 PLogObjectCreate(mat); 994 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 995 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 996 mat->destroy = MatDestroy_MPIDense; 997 mat->view = MatView_MPIDense; 998 mat->factor = A->factor; 999 mat->assembled = PETSC_TRUE; 1000 1001 a->m = mat->m = oldmat->m; 1002 a->n = mat->n = oldmat->n; 1003 a->M = mat->M = oldmat->M; 1004 a->N = mat->N = oldmat->N; 1005 if (oldmat->factor) { 1006 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 1007 /* copy factor contents ... add this code! */ 1008 } else a->factor = 0; 1009 1010 a->rstart = oldmat->rstart; 1011 a->rend = oldmat->rend; 1012 a->size = oldmat->size; 1013 a->rank = oldmat->rank; 1014 mat->insertmode = NOT_SET_VALUES; 1015 1016 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1017 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 1018 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1019 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1020 1021 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1022 PLogObjectParent(mat,a->lvec); 1023 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1024 PLogObjectParent(mat,a->Mvctx); 1025 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1026 PLogObjectParent(mat,a->A); 1027 *newmat = mat; 1028 return 0; 1029 } 1030 1031 #include "sys.h" 1032 1033 #undef __FUNC__ 1034 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1035 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1036 { 1037 int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 1038 Scalar *array,*vals,*vals_ptr; 1039 MPI_Status status; 1040 1041 MPI_Comm_rank(comm,&rank); 1042 MPI_Comm_size(comm,&size); 1043 1044 /* determine ownership of all rows */ 1045 m = M/size + ((M % size) > rank); 1046 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1047 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1048 rowners[0] = 0; 1049 for ( i=2; i<=size; i++ ) { 1050 rowners[i] += rowners[i-1]; 1051 } 1052 rstart = rowners[rank]; 1053 rend = rowners[rank+1]; 1054 1055 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1056 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 1057 1058 if (!rank) { 1059 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1060 1061 /* read in my part of the matrix numerical values */ 1062 ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 1063 1064 /* insert into matrix-by row (this is why cannot directly read into array */ 1065 vals_ptr = vals; 1066 for ( i=0; i<m; i++ ) { 1067 for ( j=0; j<N; j++ ) { 1068 array[i + j*m] = *vals_ptr++; 1069 } 1070 } 1071 1072 /* read in other processors and ship out */ 1073 for ( i=1; i<size; i++ ) { 1074 nz = (rowners[i+1] - rowners[i])*N; 1075 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1076 MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 1077 } 1078 } 1079 else { 1080 /* receive numeric values */ 1081 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1082 1083 /* receive message of values*/ 1084 MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 1085 1086 /* insert into matrix-by row (this is why cannot directly read into array */ 1087 vals_ptr = vals; 1088 for ( i=0; i<m; i++ ) { 1089 for ( j=0; j<N; j++ ) { 1090 array[i + j*m] = *vals_ptr++; 1091 } 1092 } 1093 } 1094 PetscFree(rowners); 1095 PetscFree(vals); 1096 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1097 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1098 return 0; 1099 } 1100 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatLoad_MPIDense" 1104 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1105 { 1106 Mat A; 1107 int i, nz, ierr, j,rstart, rend, fd; 1108 Scalar *vals,*svals; 1109 MPI_Comm comm = ((PetscObject)viewer)->comm; 1110 MPI_Status status; 1111 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1112 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1113 int tag = ((PetscObject)viewer)->tag; 1114 1115 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1116 if (!rank) { 1117 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1118 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1119 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1120 } 1121 1122 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1123 M = header[1]; N = header[2]; nz = header[3]; 1124 1125 /* 1126 Handle case where matrix is stored on disk as a dense matrix 1127 */ 1128 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1129 return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 1130 } 1131 1132 /* determine ownership of all rows */ 1133 m = M/size + ((M % size) > rank); 1134 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1135 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1136 rowners[0] = 0; 1137 for ( i=2; i<=size; i++ ) { 1138 rowners[i] += rowners[i-1]; 1139 } 1140 rstart = rowners[rank]; 1141 rend = rowners[rank+1]; 1142 1143 /* distribute row lengths to all processors */ 1144 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1145 offlens = ourlens + (rend-rstart); 1146 if (!rank) { 1147 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1148 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1149 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1150 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1151 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1152 PetscFree(sndcounts); 1153 } 1154 else { 1155 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1156 } 1157 1158 if (!rank) { 1159 /* calculate the number of nonzeros on each processor */ 1160 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1161 PetscMemzero(procsnz,size*sizeof(int)); 1162 for ( i=0; i<size; i++ ) { 1163 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1164 procsnz[i] += rowlengths[j]; 1165 } 1166 } 1167 PetscFree(rowlengths); 1168 1169 /* determine max buffer needed and allocate it */ 1170 maxnz = 0; 1171 for ( i=0; i<size; i++ ) { 1172 maxnz = PetscMax(maxnz,procsnz[i]); 1173 } 1174 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1175 1176 /* read in my part of the matrix column indices */ 1177 nz = procsnz[0]; 1178 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1179 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1180 1181 /* read in every one elses and ship off */ 1182 for ( i=1; i<size; i++ ) { 1183 nz = procsnz[i]; 1184 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1185 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1186 } 1187 PetscFree(cols); 1188 } 1189 else { 1190 /* determine buffer space needed for message */ 1191 nz = 0; 1192 for ( i=0; i<m; i++ ) { 1193 nz += ourlens[i]; 1194 } 1195 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1196 1197 /* receive message of column indices*/ 1198 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1199 MPI_Get_count(&status,MPI_INT,&maxnz); 1200 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1201 } 1202 1203 /* loop over local rows, determining number of off diagonal entries */ 1204 PetscMemzero(offlens,m*sizeof(int)); 1205 jj = 0; 1206 for ( i=0; i<m; i++ ) { 1207 for ( j=0; j<ourlens[i]; j++ ) { 1208 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1209 jj++; 1210 } 1211 } 1212 1213 /* create our matrix */ 1214 for ( i=0; i<m; i++ ) { 1215 ourlens[i] -= offlens[i]; 1216 } 1217 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1218 A = *newmat; 1219 for ( i=0; i<m; i++ ) { 1220 ourlens[i] += offlens[i]; 1221 } 1222 1223 if (!rank) { 1224 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1225 1226 /* read in my part of the matrix numerical values */ 1227 nz = procsnz[0]; 1228 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1229 1230 /* insert into matrix */ 1231 jj = rstart; 1232 smycols = mycols; 1233 svals = vals; 1234 for ( i=0; i<m; i++ ) { 1235 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1236 smycols += ourlens[i]; 1237 svals += ourlens[i]; 1238 jj++; 1239 } 1240 1241 /* read in other processors and ship out */ 1242 for ( i=1; i<size; i++ ) { 1243 nz = procsnz[i]; 1244 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1245 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1246 } 1247 PetscFree(procsnz); 1248 } 1249 else { 1250 /* receive numeric values */ 1251 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1252 1253 /* receive message of values*/ 1254 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1255 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1256 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1257 1258 /* insert into matrix */ 1259 jj = rstart; 1260 smycols = mycols; 1261 svals = vals; 1262 for ( i=0; i<m; i++ ) { 1263 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1264 smycols += ourlens[i]; 1265 svals += ourlens[i]; 1266 jj++; 1267 } 1268 } 1269 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1270 1271 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1272 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1273 return 0; 1274 } 1275 1276 1277 1278 1279 1280