1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.50 1996/10/01 03:34:29 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 13 { 14 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 15 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 16 int roworiented = A->roworiented; 17 18 if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 19 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 20 } 21 A->insertmode = addv; 22 for ( i=0; i<m; i++ ) { 23 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 24 if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 25 if (idxm[i] >= rstart && idxm[i] < rend) { 26 row = idxm[i] - rstart; 27 if (roworiented) { 28 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 29 } 30 else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 33 if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 35 } 36 } 37 } 38 else { 39 if (roworiented) { 40 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 41 } 42 else { /* must stash each seperately */ 43 row = idxm[i]; 44 for ( j=0; j<n; j++ ) { 45 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 46 } 47 } 48 } 49 } 50 return 0; 51 } 52 53 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 54 { 55 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 56 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 57 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 65 if (idxn[j] >= mdn->N) 66 SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 67 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68 } 69 } 70 else { 71 SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 72 } 73 } 74 return 0; 75 } 76 77 static int MatGetArray_MPIDense(Mat A,Scalar **array) 78 { 79 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 80 int ierr; 81 82 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 83 return 0; 84 } 85 86 static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 87 { 88 return 0; 89 } 90 91 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 92 { 93 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 94 MPI_Comm comm = mat->comm; 95 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 96 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 97 int tag = mat->tag, *owner,*starts,count,ierr; 98 InsertMode addv; 99 MPI_Request *send_waits,*recv_waits; 100 Scalar *rvalues,*svalues; 101 102 /* make sure all processors are either in INSERTMODE or ADDMODE */ 103 MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 104 if (addv == (ADD_VALUES|INSERT_VALUES)) { 105 SETERRQ(1,"MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 106 } 107 mdn->insertmode = addv; /* in case this processor had no cache */ 108 109 /* first count number of contributors to each processor */ 110 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 111 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 112 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 113 for ( i=0; i<mdn->stash.n; i++ ) { 114 idx = mdn->stash.idx[i]; 115 for ( j=0; j<size; j++ ) { 116 if (idx >= owners[j] && idx < owners[j+1]) { 117 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 118 } 119 } 120 } 121 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 122 123 /* inform other processors of number of messages and max length*/ 124 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 125 MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 126 nreceives = work[rank]; 127 if (nreceives > size) SETERRQ(1,"MatAssemblyBegin_MPIDense:Internal PETSc error"); 128 MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 129 nmax = work[rank]; 130 PetscFree(work); 131 132 /* post receives: 133 1) each message will consist of ordered pairs 134 (global index,value) we store the global index as a double 135 to simplify the message passing. 136 2) since we don't know how long each individual message is we 137 allocate the largest needed buffer for each receive. Potentially 138 this is a lot of wasted space. 139 140 This could be done better. 141 */ 142 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 143 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 144 for ( i=0; i<nreceives; i++ ) { 145 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 146 } 147 148 /* do sends: 149 1) starts[i] gives the starting index in svalues for stuff going to 150 the ith processor 151 */ 152 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 153 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 154 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 155 starts[0] = 0; 156 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 157 for ( i=0; i<mdn->stash.n; i++ ) { 158 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 159 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 160 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 161 } 162 PetscFree(owner); 163 starts[0] = 0; 164 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 165 count = 0; 166 for ( i=0; i<size; i++ ) { 167 if (procs[i]) { 168 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 169 } 170 } 171 PetscFree(starts); PetscFree(nprocs); 172 173 /* Free cache space */ 174 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 175 176 mdn->svalues = svalues; mdn->rvalues = rvalues; 177 mdn->nsends = nsends; mdn->nrecvs = nreceives; 178 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 179 mdn->rmax = nmax; 180 181 return 0; 182 } 183 extern int MatSetUpMultiply_MPIDense(Mat); 184 185 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 186 { 187 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 188 MPI_Status *send_status,recv_status; 189 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 190 Scalar *values,val; 191 InsertMode addv = mdn->insertmode; 192 193 /* wait on receives */ 194 while (count) { 195 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 196 /* unpack receives into our local space */ 197 values = mdn->rvalues + 3*imdex*mdn->rmax; 198 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 199 n = n/3; 200 for ( i=0; i<n; i++ ) { 201 row = (int) PetscReal(values[3*i]) - mdn->rstart; 202 col = (int) PetscReal(values[3*i+1]); 203 val = values[3*i+2]; 204 if (col >= 0 && col < mdn->N) { 205 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 206 } 207 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 208 } 209 count--; 210 } 211 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 212 213 /* wait on sends */ 214 if (mdn->nsends) { 215 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 216 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 217 PetscFree(send_status); 218 } 219 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 220 221 mdn->insertmode = NOT_SET_VALUES; 222 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 223 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 224 225 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 226 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 227 } 228 return 0; 229 } 230 231 static int MatZeroEntries_MPIDense(Mat A) 232 { 233 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 234 return MatZeroEntries(l->A); 235 } 236 237 static int MatGetBlockSize_MPIDense(Mat A,int *bs) 238 { 239 *bs = 1; 240 return 0; 241 } 242 243 /* the code does not do the diagonal entries correctly unless the 244 matrix is square and the column and row owerships are identical. 245 This is a BUG. The only way to fix it seems to be to access 246 mdn->A and mdn->B directly and not through the MatZeroRows() 247 routine. 248 */ 249 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 250 { 251 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 252 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 253 int *procs,*nprocs,j,found,idx,nsends,*work; 254 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 255 int *rvalues,tag = A->tag,count,base,slen,n,*source; 256 int *lens,imdex,*lrows,*values; 257 MPI_Comm comm = A->comm; 258 MPI_Request *send_waits,*recv_waits; 259 MPI_Status recv_status,*send_status; 260 IS istmp; 261 262 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 263 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 264 265 /* first count number of contributors to each processor */ 266 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 267 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 268 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 269 for ( i=0; i<N; i++ ) { 270 idx = rows[i]; 271 found = 0; 272 for ( j=0; j<size; j++ ) { 273 if (idx >= owners[j] && idx < owners[j+1]) { 274 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 275 } 276 } 277 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 278 } 279 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 280 281 /* inform other processors of number of messages and max length*/ 282 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 283 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 284 nrecvs = work[rank]; 285 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 286 nmax = work[rank]; 287 PetscFree(work); 288 289 /* post receives: */ 290 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 291 CHKPTRQ(rvalues); 292 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 293 CHKPTRQ(recv_waits); 294 for ( i=0; i<nrecvs; i++ ) { 295 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 296 } 297 298 /* do sends: 299 1) starts[i] gives the starting index in svalues for stuff going to 300 the ith processor 301 */ 302 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 303 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 304 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 305 starts[0] = 0; 306 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 307 for ( i=0; i<N; i++ ) { 308 svalues[starts[owner[i]]++] = rows[i]; 309 } 310 ISRestoreIndices(is,&rows); 311 312 starts[0] = 0; 313 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 314 count = 0; 315 for ( i=0; i<size; i++ ) { 316 if (procs[i]) { 317 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 318 } 319 } 320 PetscFree(starts); 321 322 base = owners[rank]; 323 324 /* wait on receives */ 325 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 326 source = lens + nrecvs; 327 count = nrecvs; slen = 0; 328 while (count) { 329 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 330 /* unpack receives into our local space */ 331 MPI_Get_count(&recv_status,MPI_INT,&n); 332 source[imdex] = recv_status.MPI_SOURCE; 333 lens[imdex] = n; 334 slen += n; 335 count--; 336 } 337 PetscFree(recv_waits); 338 339 /* move the data into the send scatter */ 340 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 341 count = 0; 342 for ( i=0; i<nrecvs; i++ ) { 343 values = rvalues + i*nmax; 344 for ( j=0; j<lens[i]; j++ ) { 345 lrows[count++] = values[j] - base; 346 } 347 } 348 PetscFree(rvalues); PetscFree(lens); 349 PetscFree(owner); PetscFree(nprocs); 350 351 /* actually zap the local rows */ 352 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 353 PLogObjectParent(A,istmp); 354 PetscFree(lrows); 355 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 356 ierr = ISDestroy(istmp); CHKERRQ(ierr); 357 358 /* wait on sends */ 359 if (nsends) { 360 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 361 MPI_Waitall(nsends,send_waits,send_status); 362 PetscFree(send_status); 363 } 364 PetscFree(send_waits); PetscFree(svalues); 365 366 return 0; 367 } 368 369 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 370 { 371 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 372 int ierr; 373 374 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 375 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 376 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 377 return 0; 378 } 379 380 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 381 { 382 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 383 int ierr; 384 385 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 386 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 387 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 388 return 0; 389 } 390 391 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 392 { 393 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 394 int ierr; 395 Scalar zero = 0.0; 396 397 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 398 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 399 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 400 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 401 return 0; 402 } 403 404 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 405 { 406 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 407 int ierr; 408 409 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 410 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 411 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 412 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 413 return 0; 414 } 415 416 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 417 { 418 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 419 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 420 int ierr, len, i, n, m = a->m, radd; 421 Scalar *x, zero = 0.0; 422 423 VecSet(&zero,v); 424 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 425 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 426 if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 427 len = PetscMin(aloc->m,aloc->n); 428 radd = a->rstart*m; 429 for ( i=0; i<len; i++ ) { 430 x[i] = aloc->v[radd + i*m + i]; 431 } 432 return 0; 433 } 434 435 static int MatDestroy_MPIDense(PetscObject obj) 436 { 437 Mat mat = (Mat) obj; 438 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 439 int ierr; 440 441 #if defined(PETSC_LOG) 442 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 443 #endif 444 PetscFree(mdn->rowners); 445 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 446 if (mdn->lvec) VecDestroy(mdn->lvec); 447 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 448 if (mdn->factor) { 449 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 450 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 451 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 452 PetscFree(mdn->factor); 453 } 454 PetscFree(mdn); 455 PLogObjectDestroy(mat); 456 PetscHeaderDestroy(mat); 457 return 0; 458 } 459 460 #include "pinclude/pviewer.h" 461 462 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 463 { 464 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 465 int ierr; 466 467 if (mdn->size == 1) { 468 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 469 } 470 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 471 return 0; 472 } 473 474 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 477 int ierr, format; 478 FILE *fd; 479 ViewerType vtype; 480 481 ViewerGetType(viewer,&vtype); 482 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 483 ierr = ViewerGetFormat(viewer,&format); 484 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 485 int rank; 486 MatInfo info; 487 MPI_Comm_rank(mat->comm,&rank); 488 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 489 PetscSequentialPhaseBegin(mat->comm,1); 490 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 491 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 492 fflush(fd); 493 PetscSequentialPhaseEnd(mat->comm,1); 494 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 495 return 0; 496 } 497 else if (format == VIEWER_FORMAT_ASCII_INFO) { 498 return 0; 499 } 500 if (vtype == ASCII_FILE_VIEWER) { 501 PetscSequentialPhaseBegin(mat->comm,1); 502 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 503 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 504 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 505 fflush(fd); 506 PetscSequentialPhaseEnd(mat->comm,1); 507 } 508 else { 509 int size = mdn->size, rank = mdn->rank; 510 if (size == 1) { 511 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 512 } 513 else { 514 /* assemble the entire matrix onto first processor. */ 515 Mat A; 516 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 517 Scalar *vals; 518 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 519 520 if (!rank) { 521 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 522 } 523 else { 524 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 525 } 526 PLogObjectParent(mat,A); 527 528 /* Copy the matrix ... This isn't the most efficient means, 529 but it's quick for now */ 530 row = mdn->rstart; m = Amdn->m; 531 for ( i=0; i<m; i++ ) { 532 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 533 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 534 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 535 row++; 536 } 537 538 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 539 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 540 if (!rank) { 541 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 542 } 543 ierr = MatDestroy(A); CHKERRQ(ierr); 544 } 545 } 546 return 0; 547 } 548 549 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 550 { 551 Mat mat = (Mat) obj; 552 int ierr; 553 ViewerType vtype; 554 555 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 556 if (vtype == ASCII_FILE_VIEWER) { 557 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 558 } 559 else if (vtype == ASCII_FILES_VIEWER) { 560 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 561 } 562 else if (vtype == BINARY_FILE_VIEWER) { 563 return MatView_MPIDense_Binary(mat,viewer); 564 } 565 return 0; 566 } 567 568 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 569 { 570 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 571 Mat mdn = mat->A; 572 int ierr; 573 double isend[5], irecv[5]; 574 575 info->rows_global = (double)mat->M; 576 info->columns_global = (double)mat->N; 577 info->rows_local = (double)mat->m; 578 info->columns_local = (double)mat->N; 579 info->block_size = 1.0; 580 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 581 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 582 isend[3] = info->memory; isend[4] = info->mallocs; 583 if (flag == MAT_LOCAL) { 584 info->nz_used = isend[0]; 585 info->nz_allocated = isend[1]; 586 info->nz_unneeded = isend[2]; 587 info->memory = isend[3]; 588 info->mallocs = isend[4]; 589 } else if (flag == MAT_GLOBAL_MAX) { 590 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 591 info->nz_used = irecv[0]; 592 info->nz_allocated = irecv[1]; 593 info->nz_unneeded = irecv[2]; 594 info->memory = irecv[3]; 595 info->mallocs = irecv[4]; 596 } else if (flag == MAT_GLOBAL_SUM) { 597 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 598 info->nz_used = irecv[0]; 599 info->nz_allocated = irecv[1]; 600 info->nz_unneeded = irecv[2]; 601 info->memory = irecv[3]; 602 info->mallocs = irecv[4]; 603 } 604 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 605 info->fill_ratio_needed = 0; 606 info->factor_mallocs = 0; 607 return 0; 608 } 609 610 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 611 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 612 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 613 extern int MatSolve_MPIDense(Mat,Vec,Vec); 614 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 615 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 616 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 617 618 static int MatSetOption_MPIDense(Mat A,MatOption op) 619 { 620 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 621 622 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 623 op == MAT_YES_NEW_NONZERO_LOCATIONS || 624 op == MAT_COLUMNS_SORTED || 625 op == MAT_ROW_ORIENTED) { 626 MatSetOption(a->A,op); 627 } 628 else if (op == MAT_ROWS_SORTED || 629 op == MAT_SYMMETRIC || 630 op == MAT_STRUCTURALLY_SYMMETRIC || 631 op == MAT_YES_NEW_DIAGONALS) 632 PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 633 else if (op == MAT_COLUMN_ORIENTED) 634 { a->roworiented = 0; MatSetOption(a->A,op);} 635 else if (op == MAT_NO_NEW_DIAGONALS) 636 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");} 637 else 638 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 639 return 0; 640 } 641 642 static int MatGetSize_MPIDense(Mat A,int *m,int *n) 643 { 644 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 645 *m = mat->M; *n = mat->N; 646 return 0; 647 } 648 649 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 650 { 651 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 652 *m = mat->m; *n = mat->N; 653 return 0; 654 } 655 656 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 657 { 658 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 659 *m = mat->rstart; *n = mat->rend; 660 return 0; 661 } 662 663 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 664 { 665 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 666 int lrow, rstart = mat->rstart, rend = mat->rend; 667 668 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 669 lrow = row - rstart; 670 return MatGetRow(mat->A,lrow,nz,idx,v); 671 } 672 673 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 674 { 675 if (idx) PetscFree(*idx); 676 if (v) PetscFree(*v); 677 return 0; 678 } 679 680 static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 681 { 682 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 683 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 684 int ierr, i, j; 685 double sum = 0.0; 686 Scalar *v = mat->v; 687 688 if (mdn->size == 1) { 689 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 690 } else { 691 if (type == NORM_FROBENIUS) { 692 for (i=0; i<mat->n*mat->m; i++ ) { 693 #if defined(PETSC_COMPLEX) 694 sum += real(conj(*v)*(*v)); v++; 695 #else 696 sum += (*v)*(*v); v++; 697 #endif 698 } 699 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 700 *norm = sqrt(*norm); 701 PLogFlops(2*mat->n*mat->m); 702 } 703 else if (type == NORM_1) { 704 double *tmp, *tmp2; 705 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 706 tmp2 = tmp + mdn->N; 707 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 708 *norm = 0.0; 709 v = mat->v; 710 for ( j=0; j<mat->n; j++ ) { 711 for ( i=0; i<mat->m; i++ ) { 712 tmp[j] += PetscAbsScalar(*v); v++; 713 } 714 } 715 MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 716 for ( j=0; j<mdn->N; j++ ) { 717 if (tmp2[j] > *norm) *norm = tmp2[j]; 718 } 719 PetscFree(tmp); 720 PLogFlops(mat->n*mat->m); 721 } 722 else if (type == NORM_INFINITY) { /* max row norm */ 723 double ntemp; 724 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 725 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 726 } 727 else { 728 SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 729 } 730 } 731 return 0; 732 } 733 734 static int MatTranspose_MPIDense(Mat A,Mat *matout) 735 { 736 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 737 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 738 Mat B; 739 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 740 int j, i, ierr; 741 Scalar *v; 742 743 if (matout == PETSC_NULL && M != N) { 744 SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 745 } 746 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 747 748 m = Aloc->m; n = Aloc->n; v = Aloc->v; 749 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 750 for ( j=0; j<n; j++ ) { 751 for (i=0; i<m; i++) rwork[i] = rstart + i; 752 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 753 v += m; 754 } 755 PetscFree(rwork); 756 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 757 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 758 if (matout != PETSC_NULL) { 759 *matout = B; 760 } else { 761 /* This isn't really an in-place transpose, but free data struct from a */ 762 PetscFree(a->rowners); 763 ierr = MatDestroy(a->A); CHKERRQ(ierr); 764 if (a->lvec) VecDestroy(a->lvec); 765 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 766 PetscFree(a); 767 PetscMemcpy(A,B,sizeof(struct _Mat)); 768 PetscHeaderDestroy(B); 769 } 770 return 0; 771 } 772 773 #include "pinclude/plapack.h" 774 static int MatScale_MPIDense(Scalar *alpha,Mat inA) 775 { 776 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 777 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 778 int one = 1, nz; 779 780 nz = a->m*a->n; 781 BLscal_( &nz, alpha, a->v, &one ); 782 PLogFlops(nz); 783 return 0; 784 } 785 786 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 787 788 /* -------------------------------------------------------------------*/ 789 static struct _MatOps MatOps = {MatSetValues_MPIDense, 790 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 791 MatMult_MPIDense,MatMultAdd_MPIDense, 792 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 793 /* MatSolve_MPIDense,0, */ 794 0,0, 795 0,0, 796 0,0, 797 /* MatLUFactor_MPIDense,0, */ 798 0,MatTranspose_MPIDense, 799 MatGetInfo_MPIDense,0, 800 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 801 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 802 0, 803 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 804 0,0, 805 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 806 0,0, 807 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 808 MatGetOwnershipRange_MPIDense, 809 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 810 0,MatConvertSameType_MPIDense, 811 0,0,0,0,0, 812 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 813 0,0,0,MatGetBlockSize_MPIDense}; 814 815 /*@C 816 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 817 818 Input Parameters: 819 . comm - MPI communicator 820 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 821 . n - number of local columns (or PETSC_DECIDE to have calculated 822 if N is given) 823 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 824 . N - number of global columns (or PETSC_DECIDE to have calculated 825 if n is given) 826 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 827 to control all matrix memory allocation. 828 829 Output Parameter: 830 . A - the matrix 831 832 Notes: 833 The dense format is fully compatible with standard Fortran 77 834 storage by columns. 835 836 The data input variable is intended primarily for Fortran programmers 837 who wish to allocate their own matrix memory space. Most users should 838 set data=PETSC_NULL. 839 840 The user MUST specify either the local or global matrix dimensions 841 (possibly both). 842 843 Currently, the only parallel dense matrix decomposition is by rows, 844 so that n=N and each submatrix owns all of the global columns. 845 846 .keywords: matrix, dense, parallel 847 848 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 849 @*/ 850 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 851 { 852 Mat mat; 853 Mat_MPIDense *a; 854 int ierr, i,flg; 855 856 /* Note: For now, when data is specified above, this assumes the user correctly 857 allocates the local dense storage space. We should add error checking. */ 858 859 *A = 0; 860 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 861 PLogObjectCreate(mat); 862 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 863 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 864 mat->destroy = MatDestroy_MPIDense; 865 mat->view = MatView_MPIDense; 866 mat->factor = 0; 867 868 a->factor = 0; 869 a->insertmode = NOT_SET_VALUES; 870 MPI_Comm_rank(comm,&a->rank); 871 MPI_Comm_size(comm,&a->size); 872 873 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 874 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 875 876 /* each row stores all columns */ 877 if (N == PETSC_DECIDE) N = n; 878 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 879 /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 880 a->N = mat->N = N; 881 a->M = mat->M = M; 882 a->m = mat->m = m; 883 a->n = mat->n = n; 884 885 /* build local table of row and column ownerships */ 886 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 887 a->cowners = a->rowners + a->size + 1; 888 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 889 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 890 a->rowners[0] = 0; 891 for ( i=2; i<=a->size; i++ ) { 892 a->rowners[i] += a->rowners[i-1]; 893 } 894 a->rstart = a->rowners[a->rank]; 895 a->rend = a->rowners[a->rank+1]; 896 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 897 a->cowners[0] = 0; 898 for ( i=2; i<=a->size; i++ ) { 899 a->cowners[i] += a->cowners[i-1]; 900 } 901 902 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 903 PLogObjectParent(mat,a->A); 904 905 /* build cache for off array entries formed */ 906 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 907 908 /* stuff used for matrix vector multiply */ 909 a->lvec = 0; 910 a->Mvctx = 0; 911 a->roworiented = 1; 912 913 *A = mat; 914 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 915 if (flg) { 916 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 917 } 918 return 0; 919 } 920 921 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 922 { 923 Mat mat; 924 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 925 int ierr; 926 FactorCtx *factor; 927 928 *newmat = 0; 929 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 930 PLogObjectCreate(mat); 931 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 932 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 933 mat->destroy = MatDestroy_MPIDense; 934 mat->view = MatView_MPIDense; 935 mat->factor = A->factor; 936 mat->assembled = PETSC_TRUE; 937 938 a->m = mat->m = oldmat->m; 939 a->n = mat->n = oldmat->n; 940 a->M = mat->M = oldmat->M; 941 a->N = mat->N = oldmat->N; 942 if (oldmat->factor) { 943 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 944 /* copy factor contents ... add this code! */ 945 } else a->factor = 0; 946 947 a->rstart = oldmat->rstart; 948 a->rend = oldmat->rend; 949 a->size = oldmat->size; 950 a->rank = oldmat->rank; 951 a->insertmode = NOT_SET_VALUES; 952 953 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 954 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 955 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 956 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 957 958 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 959 PLogObjectParent(mat,a->lvec); 960 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 961 PLogObjectParent(mat,a->Mvctx); 962 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 963 PLogObjectParent(mat,a->A); 964 *newmat = mat; 965 return 0; 966 } 967 968 #include "sys.h" 969 970 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 971 { 972 int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 973 Scalar *array,*vals,*vals_ptr; 974 MPI_Status status; 975 976 MPI_Comm_rank(comm,&rank); 977 MPI_Comm_size(comm,&size); 978 979 /* determine ownership of all rows */ 980 m = M/size + ((M % size) > rank); 981 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 982 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 983 rowners[0] = 0; 984 for ( i=2; i<=size; i++ ) { 985 rowners[i] += rowners[i-1]; 986 } 987 rstart = rowners[rank]; 988 rend = rowners[rank+1]; 989 990 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 991 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 992 993 if (!rank) { 994 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 995 996 /* read in my part of the matrix numerical values */ 997 ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 998 999 /* insert into matrix-by row (this is why cannot directly read into array */ 1000 vals_ptr = vals; 1001 for ( i=0; i<m; i++ ) { 1002 for ( j=0; j<N; j++ ) { 1003 array[i + j*m] = *vals_ptr++; 1004 } 1005 } 1006 1007 /* read in other processors and ship out */ 1008 for ( i=1; i<size; i++ ) { 1009 nz = (rowners[i+1] - rowners[i])*N; 1010 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1011 MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 1012 } 1013 } 1014 else { 1015 /* receive numeric values */ 1016 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1017 1018 /* receive message of values*/ 1019 MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 1020 1021 /* insert into matrix-by row (this is why cannot directly read into array */ 1022 vals_ptr = vals; 1023 for ( i=0; i<m; i++ ) { 1024 for ( j=0; j<N; j++ ) { 1025 array[i + j*m] = *vals_ptr++; 1026 } 1027 } 1028 } 1029 PetscFree(rowners); 1030 PetscFree(vals); 1031 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1033 return 0; 1034 } 1035 1036 1037 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1038 { 1039 Mat A; 1040 int i, nz, ierr, j,rstart, rend, fd; 1041 Scalar *vals,*svals; 1042 MPI_Comm comm = ((PetscObject)viewer)->comm; 1043 MPI_Status status; 1044 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1045 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1046 int tag = ((PetscObject)viewer)->tag; 1047 1048 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1049 if (!rank) { 1050 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1051 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1052 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 1053 } 1054 1055 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1056 M = header[1]; N = header[2]; nz = header[3]; 1057 1058 /* 1059 Handle case where matrix is stored on disk as a dense matrix 1060 */ 1061 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1062 return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 1063 } 1064 1065 /* determine ownership of all rows */ 1066 m = M/size + ((M % size) > rank); 1067 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1068 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1069 rowners[0] = 0; 1070 for ( i=2; i<=size; i++ ) { 1071 rowners[i] += rowners[i-1]; 1072 } 1073 rstart = rowners[rank]; 1074 rend = rowners[rank+1]; 1075 1076 /* distribute row lengths to all processors */ 1077 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1078 offlens = ourlens + (rend-rstart); 1079 if (!rank) { 1080 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1081 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1082 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1083 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1084 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1085 PetscFree(sndcounts); 1086 } 1087 else { 1088 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1089 } 1090 1091 if (!rank) { 1092 /* calculate the number of nonzeros on each processor */ 1093 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1094 PetscMemzero(procsnz,size*sizeof(int)); 1095 for ( i=0; i<size; i++ ) { 1096 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1097 procsnz[i] += rowlengths[j]; 1098 } 1099 } 1100 PetscFree(rowlengths); 1101 1102 /* determine max buffer needed and allocate it */ 1103 maxnz = 0; 1104 for ( i=0; i<size; i++ ) { 1105 maxnz = PetscMax(maxnz,procsnz[i]); 1106 } 1107 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1108 1109 /* read in my part of the matrix column indices */ 1110 nz = procsnz[0]; 1111 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1112 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1113 1114 /* read in every one elses and ship off */ 1115 for ( i=1; i<size; i++ ) { 1116 nz = procsnz[i]; 1117 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1118 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1119 } 1120 PetscFree(cols); 1121 } 1122 else { 1123 /* determine buffer space needed for message */ 1124 nz = 0; 1125 for ( i=0; i<m; i++ ) { 1126 nz += ourlens[i]; 1127 } 1128 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1129 1130 /* receive message of column indices*/ 1131 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1132 MPI_Get_count(&status,MPI_INT,&maxnz); 1133 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1134 } 1135 1136 /* loop over local rows, determining number of off diagonal entries */ 1137 PetscMemzero(offlens,m*sizeof(int)); 1138 jj = 0; 1139 for ( i=0; i<m; i++ ) { 1140 for ( j=0; j<ourlens[i]; j++ ) { 1141 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1142 jj++; 1143 } 1144 } 1145 1146 /* create our matrix */ 1147 for ( i=0; i<m; i++ ) { 1148 ourlens[i] -= offlens[i]; 1149 } 1150 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1151 A = *newmat; 1152 for ( i=0; i<m; i++ ) { 1153 ourlens[i] += offlens[i]; 1154 } 1155 1156 if (!rank) { 1157 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1158 1159 /* read in my part of the matrix numerical values */ 1160 nz = procsnz[0]; 1161 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1162 1163 /* insert into matrix */ 1164 jj = rstart; 1165 smycols = mycols; 1166 svals = vals; 1167 for ( i=0; i<m; i++ ) { 1168 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1169 smycols += ourlens[i]; 1170 svals += ourlens[i]; 1171 jj++; 1172 } 1173 1174 /* read in other processors and ship out */ 1175 for ( i=1; i<size; i++ ) { 1176 nz = procsnz[i]; 1177 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1178 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1179 } 1180 PetscFree(procsnz); 1181 } 1182 else { 1183 /* receive numeric values */ 1184 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1185 1186 /* receive message of values*/ 1187 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1188 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1189 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1190 1191 /* insert into matrix */ 1192 jj = rstart; 1193 smycols = mycols; 1194 svals = vals; 1195 for ( i=0; i<m; i++ ) { 1196 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1197 smycols += ourlens[i]; 1198 svals += ourlens[i]; 1199 jj++; 1200 } 1201 } 1202 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1203 1204 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1205 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1206 return 0; 1207 } 1208 1209 1210 1211 1212 1213