1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.46 1996/08/15 12:47:17 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 13 { 14 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 15 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 16 int roworiented = A->roworiented; 17 18 if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 19 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 20 } 21 A->insertmode = addv; 22 for ( i=0; i<m; i++ ) { 23 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 24 if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 25 if (idxm[i] >= rstart && idxm[i] < rend) { 26 row = idxm[i] - rstart; 27 if (roworiented) { 28 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 29 } 30 else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 33 if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 35 } 36 } 37 } 38 else { 39 if (roworiented) { 40 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 41 } 42 else { /* must stash each seperately */ 43 row = idxm[i]; 44 for ( j=0; j<n; j++ ) { 45 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 46 } 47 } 48 } 49 } 50 return 0; 51 } 52 53 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 54 { 55 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 56 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 57 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 65 if (idxn[j] >= mdn->N) 66 SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 67 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68 } 69 } 70 else { 71 SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 72 } 73 } 74 return 0; 75 } 76 77 static int MatGetArray_MPIDense(Mat A,Scalar **array) 78 { 79 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 80 int ierr; 81 82 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 83 return 0; 84 } 85 86 static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 87 { 88 return 0; 89 } 90 91 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 92 { 93 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 94 MPI_Comm comm = mat->comm; 95 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 96 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 97 int tag = mat->tag, *owner,*starts,count,ierr; 98 InsertMode addv; 99 MPI_Request *send_waits,*recv_waits; 100 Scalar *rvalues,*svalues; 101 102 /* make sure all processors are either in INSERTMODE or ADDMODE */ 103 MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 104 if (addv == (ADD_VALUES|INSERT_VALUES)) { 105 SETERRQ(1,"MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 106 } 107 mdn->insertmode = addv; /* in case this processor had no cache */ 108 109 /* first count number of contributors to each processor */ 110 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 111 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 112 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 113 for ( i=0; i<mdn->stash.n; i++ ) { 114 idx = mdn->stash.idx[i]; 115 for ( j=0; j<size; j++ ) { 116 if (idx >= owners[j] && idx < owners[j+1]) { 117 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 118 } 119 } 120 } 121 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 122 123 /* inform other processors of number of messages and max length*/ 124 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 125 MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 126 nreceives = work[rank]; 127 MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 128 nmax = work[rank]; 129 PetscFree(work); 130 131 /* post receives: 132 1) each message will consist of ordered pairs 133 (global index,value) we store the global index as a double 134 to simplify the message passing. 135 2) since we don't know how long each individual message is we 136 allocate the largest needed buffer for each receive. Potentially 137 this is a lot of wasted space. 138 139 This could be done better. 140 */ 141 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 142 CHKPTRQ(rvalues); 143 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 144 CHKPTRQ(recv_waits); 145 for ( i=0; i<nreceives; i++ ) { 146 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 147 } 148 149 /* do sends: 150 1) starts[i] gives the starting index in svalues for stuff going to 151 the ith processor 152 */ 153 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 154 CHKPTRQ(svalues); 155 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 156 CHKPTRQ(send_waits); 157 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 158 starts[0] = 0; 159 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 160 for ( i=0; i<mdn->stash.n; i++ ) { 161 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 162 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 163 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 164 } 165 PetscFree(owner); 166 starts[0] = 0; 167 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 168 count = 0; 169 for ( i=0; i<size; i++ ) { 170 if (procs[i]) { 171 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 172 } 173 } 174 PetscFree(starts); PetscFree(nprocs); 175 176 /* Free cache space */ 177 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 178 179 mdn->svalues = svalues; mdn->rvalues = rvalues; 180 mdn->nsends = nsends; mdn->nrecvs = nreceives; 181 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 182 mdn->rmax = nmax; 183 184 return 0; 185 } 186 extern int MatSetUpMultiply_MPIDense(Mat); 187 188 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 189 { 190 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 191 MPI_Status *send_status,recv_status; 192 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 193 Scalar *values,val; 194 InsertMode addv = mdn->insertmode; 195 196 /* wait on receives */ 197 while (count) { 198 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 199 /* unpack receives into our local space */ 200 values = mdn->rvalues + 3*imdex*mdn->rmax; 201 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 202 n = n/3; 203 for ( i=0; i<n; i++ ) { 204 row = (int) PetscReal(values[3*i]) - mdn->rstart; 205 col = (int) PetscReal(values[3*i+1]); 206 val = values[3*i+2]; 207 if (col >= 0 && col < mdn->N) { 208 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 209 } 210 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 211 } 212 count--; 213 } 214 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 215 216 /* wait on sends */ 217 if (mdn->nsends) { 218 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 219 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 220 PetscFree(send_status); 221 } 222 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 223 224 mdn->insertmode = NOT_SET_VALUES; 225 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 226 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 227 228 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 229 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 230 } 231 return 0; 232 } 233 234 static int MatZeroEntries_MPIDense(Mat A) 235 { 236 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 237 return MatZeroEntries(l->A); 238 } 239 240 /* the code does not do the diagonal entries correctly unless the 241 matrix is square and the column and row owerships are identical. 242 This is a BUG. The only way to fix it seems to be to access 243 mdn->A and mdn->B directly and not through the MatZeroRows() 244 routine. 245 */ 246 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 247 { 248 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 249 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 250 int *procs,*nprocs,j,found,idx,nsends,*work; 251 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 252 int *rvalues,tag = A->tag,count,base,slen,n,*source; 253 int *lens,imdex,*lrows,*values; 254 MPI_Comm comm = A->comm; 255 MPI_Request *send_waits,*recv_waits; 256 MPI_Status recv_status,*send_status; 257 IS istmp; 258 259 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 260 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 261 262 /* first count number of contributors to each processor */ 263 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 264 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 265 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 266 for ( i=0; i<N; i++ ) { 267 idx = rows[i]; 268 found = 0; 269 for ( j=0; j<size; j++ ) { 270 if (idx >= owners[j] && idx < owners[j+1]) { 271 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 272 } 273 } 274 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 275 } 276 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 277 278 /* inform other processors of number of messages and max length*/ 279 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 280 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 281 nrecvs = work[rank]; 282 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 283 nmax = work[rank]; 284 PetscFree(work); 285 286 /* post receives: */ 287 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 288 CHKPTRQ(rvalues); 289 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 290 CHKPTRQ(recv_waits); 291 for ( i=0; i<nrecvs; i++ ) { 292 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 293 } 294 295 /* do sends: 296 1) starts[i] gives the starting index in svalues for stuff going to 297 the ith processor 298 */ 299 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 300 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 301 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 302 starts[0] = 0; 303 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 304 for ( i=0; i<N; i++ ) { 305 svalues[starts[owner[i]]++] = rows[i]; 306 } 307 ISRestoreIndices(is,&rows); 308 309 starts[0] = 0; 310 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 311 count = 0; 312 for ( i=0; i<size; i++ ) { 313 if (procs[i]) { 314 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 315 } 316 } 317 PetscFree(starts); 318 319 base = owners[rank]; 320 321 /* wait on receives */ 322 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 323 source = lens + nrecvs; 324 count = nrecvs; slen = 0; 325 while (count) { 326 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 327 /* unpack receives into our local space */ 328 MPI_Get_count(&recv_status,MPI_INT,&n); 329 source[imdex] = recv_status.MPI_SOURCE; 330 lens[imdex] = n; 331 slen += n; 332 count--; 333 } 334 PetscFree(recv_waits); 335 336 /* move the data into the send scatter */ 337 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 338 count = 0; 339 for ( i=0; i<nrecvs; i++ ) { 340 values = rvalues + i*nmax; 341 for ( j=0; j<lens[i]; j++ ) { 342 lrows[count++] = values[j] - base; 343 } 344 } 345 PetscFree(rvalues); PetscFree(lens); 346 PetscFree(owner); PetscFree(nprocs); 347 348 /* actually zap the local rows */ 349 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 350 PLogObjectParent(A,istmp); 351 PetscFree(lrows); 352 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 353 ierr = ISDestroy(istmp); CHKERRQ(ierr); 354 355 /* wait on sends */ 356 if (nsends) { 357 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 358 MPI_Waitall(nsends,send_waits,send_status); 359 PetscFree(send_status); 360 } 361 PetscFree(send_waits); PetscFree(svalues); 362 363 return 0; 364 } 365 366 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 367 { 368 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 369 int ierr; 370 371 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 372 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 373 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 374 return 0; 375 } 376 377 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 380 int ierr; 381 382 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 383 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 384 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 385 return 0; 386 } 387 388 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 389 { 390 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 391 int ierr; 392 Scalar zero = 0.0; 393 394 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 395 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 396 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 397 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 398 return 0; 399 } 400 401 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 402 { 403 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 404 int ierr; 405 406 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 407 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 408 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 409 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 410 return 0; 411 } 412 413 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 414 { 415 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 416 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 417 int ierr, len, i, n, m = a->m, radd; 418 Scalar *x, zero = 0.0; 419 420 VecSet(&zero,v); 421 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 422 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 423 if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 424 len = PetscMin(aloc->m,aloc->n); 425 radd = a->rstart*m; 426 for ( i=0; i<len; i++ ) { 427 x[i] = aloc->v[radd + i*m + i]; 428 } 429 return 0; 430 } 431 432 static int MatDestroy_MPIDense(PetscObject obj) 433 { 434 Mat mat = (Mat) obj; 435 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 436 int ierr; 437 438 #if defined(PETSC_LOG) 439 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 440 #endif 441 PetscFree(mdn->rowners); 442 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 443 if (mdn->lvec) VecDestroy(mdn->lvec); 444 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 445 if (mdn->factor) { 446 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 447 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 448 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 449 PetscFree(mdn->factor); 450 } 451 PetscFree(mdn); 452 PLogObjectDestroy(mat); 453 PetscHeaderDestroy(mat); 454 return 0; 455 } 456 457 #include "pinclude/pviewer.h" 458 459 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 460 { 461 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 462 int ierr; 463 464 if (mdn->size == 1) { 465 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 466 } 467 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 468 return 0; 469 } 470 471 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 474 int ierr, format; 475 FILE *fd; 476 ViewerType vtype; 477 478 ViewerGetType(viewer,&vtype); 479 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 480 ierr = ViewerGetFormat(viewer,&format); 481 if (format == ASCII_FORMAT_INFO_DETAILED) { 482 int nz, nzalloc, mem, rank; 483 MPI_Comm_rank(mat->comm,&rank); 484 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 485 PetscSequentialPhaseBegin(mat->comm,1); 486 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 487 rank,mdn->m,nz,nzalloc,mem); 488 fflush(fd); 489 PetscSequentialPhaseEnd(mat->comm,1); 490 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 491 return 0; 492 } 493 else if (format == ASCII_FORMAT_INFO) { 494 return 0; 495 } 496 if (vtype == ASCII_FILE_VIEWER) { 497 PetscSequentialPhaseBegin(mat->comm,1); 498 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 499 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 500 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 501 fflush(fd); 502 PetscSequentialPhaseEnd(mat->comm,1); 503 } 504 else { 505 int size = mdn->size, rank = mdn->rank; 506 if (size == 1) { 507 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 508 } 509 else { 510 /* assemble the entire matrix onto first processor. */ 511 Mat A; 512 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 513 Scalar *vals; 514 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 515 516 if (!rank) { 517 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 518 } 519 else { 520 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 521 } 522 PLogObjectParent(mat,A); 523 524 /* Copy the matrix ... This isn't the most efficient means, 525 but it's quick for now */ 526 row = mdn->rstart; m = Amdn->m; 527 for ( i=0; i<m; i++ ) { 528 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 529 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 530 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 531 row++; 532 } 533 534 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 536 if (!rank) { 537 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 538 } 539 ierr = MatDestroy(A); CHKERRQ(ierr); 540 } 541 } 542 return 0; 543 } 544 545 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 546 { 547 Mat mat = (Mat) obj; 548 int ierr; 549 ViewerType vtype; 550 551 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 552 if (vtype == ASCII_FILE_VIEWER) { 553 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 554 } 555 else if (vtype == ASCII_FILES_VIEWER) { 556 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 557 } 558 else if (vtype == BINARY_FILE_VIEWER) { 559 return MatView_MPIDense_Binary(mat,viewer); 560 } 561 return 0; 562 } 563 564 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz,int *nzalloc,int *mem) 565 { 566 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 567 Mat mdn = mat->A; 568 int ierr, isend[3], irecv[3]; 569 570 ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 571 if (flag == MAT_LOCAL) { 572 if (nz) *nz = isend[0]; 573 if (nzalloc) *nzalloc = isend[1]; 574 if (mem) *mem = isend[2]; 575 } else if (flag == MAT_GLOBAL_MAX) { 576 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 577 if (nz) *nz = irecv[0]; 578 if (nzalloc) *nzalloc = irecv[1]; 579 if (mem) *mem = irecv[2]; 580 } else if (flag == MAT_GLOBAL_SUM) { 581 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 582 if (nz) *nz = irecv[0]; 583 if (nzalloc) *nzalloc = irecv[1]; 584 if (mem) *mem = irecv[2]; 585 } 586 return 0; 587 } 588 589 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 590 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 591 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 592 extern int MatSolve_MPIDense(Mat,Vec,Vec); 593 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 594 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 595 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 596 597 static int MatSetOption_MPIDense(Mat A,MatOption op) 598 { 599 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 600 601 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 602 op == MAT_YES_NEW_NONZERO_LOCATIONS || 603 op == MAT_COLUMNS_SORTED || 604 op == MAT_ROW_ORIENTED) { 605 MatSetOption(a->A,op); 606 } 607 else if (op == MAT_ROWS_SORTED || 608 op == MAT_SYMMETRIC || 609 op == MAT_STRUCTURALLY_SYMMETRIC || 610 op == MAT_YES_NEW_DIAGONALS) 611 PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 612 else if (op == MAT_COLUMN_ORIENTED) 613 { a->roworiented = 0; MatSetOption(a->A,op);} 614 else if (op == MAT_NO_NEW_DIAGONALS) 615 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");} 616 else 617 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 618 return 0; 619 } 620 621 static int MatGetSize_MPIDense(Mat A,int *m,int *n) 622 { 623 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 624 *m = mat->M; *n = mat->N; 625 return 0; 626 } 627 628 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 629 { 630 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 631 *m = mat->m; *n = mat->N; 632 return 0; 633 } 634 635 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 636 { 637 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 638 *m = mat->rstart; *n = mat->rend; 639 return 0; 640 } 641 642 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 643 { 644 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 645 int lrow, rstart = mat->rstart, rend = mat->rend; 646 647 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 648 lrow = row - rstart; 649 return MatGetRow(mat->A,lrow,nz,idx,v); 650 } 651 652 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 653 { 654 if (idx) PetscFree(*idx); 655 if (v) PetscFree(*v); 656 return 0; 657 } 658 659 static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 660 { 661 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 662 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 663 int ierr, i, j; 664 double sum = 0.0; 665 Scalar *v = mat->v; 666 667 if (mdn->size == 1) { 668 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 669 } else { 670 if (type == NORM_FROBENIUS) { 671 for (i=0; i<mat->n*mat->m; i++ ) { 672 #if defined(PETSC_COMPLEX) 673 sum += real(conj(*v)*(*v)); v++; 674 #else 675 sum += (*v)*(*v); v++; 676 #endif 677 } 678 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 679 *norm = sqrt(*norm); 680 PLogFlops(2*mat->n*mat->m); 681 } 682 else if (type == NORM_1) { 683 double *tmp, *tmp2; 684 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 685 tmp2 = tmp + mdn->N; 686 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 687 *norm = 0.0; 688 v = mat->v; 689 for ( j=0; j<mat->n; j++ ) { 690 for ( i=0; i<mat->m; i++ ) { 691 tmp[j] += PetscAbsScalar(*v); v++; 692 } 693 } 694 MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 695 for ( j=0; j<mdn->N; j++ ) { 696 if (tmp2[j] > *norm) *norm = tmp2[j]; 697 } 698 PetscFree(tmp); 699 PLogFlops(mat->n*mat->m); 700 } 701 else if (type == NORM_INFINITY) { /* max row norm */ 702 double ntemp; 703 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 704 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 705 } 706 else { 707 SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 708 } 709 } 710 return 0; 711 } 712 713 static int MatTranspose_MPIDense(Mat A,Mat *matout) 714 { 715 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 716 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 717 Mat B; 718 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 719 int j, i, ierr; 720 Scalar *v; 721 722 if (matout == PETSC_NULL && M != N) { 723 SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 724 } 725 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 726 727 m = Aloc->m; n = Aloc->n; v = Aloc->v; 728 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 729 for ( j=0; j<n; j++ ) { 730 for (i=0; i<m; i++) rwork[i] = rstart + i; 731 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 732 v += m; 733 } 734 PetscFree(rwork); 735 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 737 if (matout != PETSC_NULL) { 738 *matout = B; 739 } else { 740 /* This isn't really an in-place transpose, but free data struct from a */ 741 PetscFree(a->rowners); 742 ierr = MatDestroy(a->A); CHKERRQ(ierr); 743 if (a->lvec) VecDestroy(a->lvec); 744 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 745 PetscFree(a); 746 PetscMemcpy(A,B,sizeof(struct _Mat)); 747 PetscHeaderDestroy(B); 748 } 749 return 0; 750 } 751 752 #include "pinclude/plapack.h" 753 static int MatScale_MPIDense(Scalar *alpha,Mat inA) 754 { 755 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 756 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 757 int one = 1, nz; 758 759 nz = a->m*a->n; 760 BLscal_( &nz, alpha, a->v, &one ); 761 PLogFlops(nz); 762 return 0; 763 } 764 765 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 766 767 /* -------------------------------------------------------------------*/ 768 static struct _MatOps MatOps = {MatSetValues_MPIDense, 769 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 770 MatMult_MPIDense,MatMultAdd_MPIDense, 771 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 772 /* MatSolve_MPIDense,0, */ 773 0,0, 774 0,0, 775 0,0, 776 /* MatLUFactor_MPIDense,0, */ 777 0,MatTranspose_MPIDense, 778 MatGetInfo_MPIDense,0, 779 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 780 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 781 0, 782 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 783 0,0,0, 784 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 785 0,0, 786 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 787 MatGetOwnershipRange_MPIDense, 788 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 789 0,MatConvertSameType_MPIDense, 790 0,0,0,0,0, 791 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense}; 792 793 /*@C 794 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 795 796 Input Parameters: 797 . comm - MPI communicator 798 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 799 . n - number of local columns (or PETSC_DECIDE to have calculated 800 if N is given) 801 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 802 . N - number of global columns (or PETSC_DECIDE to have calculated 803 if n is given) 804 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 805 to control all matrix memory allocation. 806 807 Output Parameter: 808 . A - the matrix 809 810 Notes: 811 The dense format is fully compatible with standard Fortran 77 812 storage by columns. 813 814 The data input variable is intended primarily for Fortran programmers 815 who wish to allocate their own matrix memory space. Most users should 816 set data=PETSC_NULL. 817 818 The user MUST specify either the local or global matrix dimensions 819 (possibly both). 820 821 Currently, the only parallel dense matrix decomposition is by rows, 822 so that n=N and each submatrix owns all of the global columns. 823 824 .keywords: matrix, dense, parallel 825 826 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 827 @*/ 828 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 829 { 830 Mat mat; 831 Mat_MPIDense *a; 832 int ierr, i,flg; 833 834 /* Note: For now, when data is specified above, this assumes the user correctly 835 allocates the local dense storage space. We should add error checking. */ 836 837 *A = 0; 838 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 839 PLogObjectCreate(mat); 840 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 841 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 842 mat->destroy = MatDestroy_MPIDense; 843 mat->view = MatView_MPIDense; 844 mat->factor = 0; 845 846 a->factor = 0; 847 a->insertmode = NOT_SET_VALUES; 848 MPI_Comm_rank(comm,&a->rank); 849 MPI_Comm_size(comm,&a->size); 850 851 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 852 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 853 854 /* each row stores all columns */ 855 if (N == PETSC_DECIDE) N = n; 856 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 857 /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 858 a->N = mat->N = N; 859 a->M = mat->M = M; 860 a->m = mat->m = m; 861 a->n = mat->n = n; 862 863 /* build local table of row and column ownerships */ 864 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 865 a->cowners = a->rowners + a->size + 1; 866 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 867 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 868 a->rowners[0] = 0; 869 for ( i=2; i<=a->size; i++ ) { 870 a->rowners[i] += a->rowners[i-1]; 871 } 872 a->rstart = a->rowners[a->rank]; 873 a->rend = a->rowners[a->rank+1]; 874 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 875 a->cowners[0] = 0; 876 for ( i=2; i<=a->size; i++ ) { 877 a->cowners[i] += a->cowners[i-1]; 878 } 879 880 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 881 PLogObjectParent(mat,a->A); 882 883 /* build cache for off array entries formed */ 884 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 885 886 /* stuff used for matrix vector multiply */ 887 a->lvec = 0; 888 a->Mvctx = 0; 889 a->roworiented = 1; 890 891 *A = mat; 892 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 893 if (flg) { 894 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 895 } 896 return 0; 897 } 898 899 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 900 { 901 Mat mat; 902 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 903 int ierr; 904 FactorCtx *factor; 905 906 *newmat = 0; 907 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 908 PLogObjectCreate(mat); 909 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 910 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 911 mat->destroy = MatDestroy_MPIDense; 912 mat->view = MatView_MPIDense; 913 mat->factor = A->factor; 914 mat->assembled = PETSC_TRUE; 915 916 a->m = mat->m = oldmat->m; 917 a->n = mat->n = oldmat->n; 918 a->M = mat->M = oldmat->M; 919 a->N = mat->N = oldmat->N; 920 if (oldmat->factor) { 921 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 922 /* copy factor contents ... add this code! */ 923 } else a->factor = 0; 924 925 a->rstart = oldmat->rstart; 926 a->rend = oldmat->rend; 927 a->size = oldmat->size; 928 a->rank = oldmat->rank; 929 a->insertmode = NOT_SET_VALUES; 930 931 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 932 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 933 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 934 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 935 936 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 937 PLogObjectParent(mat,a->lvec); 938 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 939 PLogObjectParent(mat,a->Mvctx); 940 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 941 PLogObjectParent(mat,a->A); 942 *newmat = mat; 943 return 0; 944 } 945 946 #include "sys.h" 947 948 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 949 { 950 int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 951 Scalar *array,*vals,*vals_ptr; 952 MPI_Status status; 953 954 MPI_Comm_rank(comm,&rank); 955 MPI_Comm_size(comm,&size); 956 957 /* determine ownership of all rows */ 958 m = M/size + ((M % size) > rank); 959 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 960 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 961 rowners[0] = 0; 962 for ( i=2; i<=size; i++ ) { 963 rowners[i] += rowners[i-1]; 964 } 965 rstart = rowners[rank]; 966 rend = rowners[rank+1]; 967 968 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 969 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 970 971 if (!rank) { 972 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 973 974 /* read in my part of the matrix numerical values */ 975 ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 976 977 /* insert into matrix-by row (this is why cannot directly read into array */ 978 vals_ptr = vals; 979 for ( i=0; i<m; i++ ) { 980 for ( j=0; j<N; j++ ) { 981 array[i + j*m] = *vals_ptr++; 982 } 983 } 984 985 /* read in other processors and ship out */ 986 for ( i=1; i<size; i++ ) { 987 nz = (rowners[i+1] - rowners[i])*N; 988 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 989 MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 990 } 991 } 992 else { 993 /* receive numeric values */ 994 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 995 996 /* receive message of values*/ 997 MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 998 999 /* insert into matrix-by row (this is why cannot directly read into array */ 1000 vals_ptr = vals; 1001 for ( i=0; i<m; i++ ) { 1002 for ( j=0; j<N; j++ ) { 1003 array[i + j*m] = *vals_ptr++; 1004 } 1005 } 1006 } 1007 PetscFree(rowners); 1008 PetscFree(vals); 1009 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1010 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1011 return 0; 1012 } 1013 1014 1015 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1016 { 1017 Mat A; 1018 int i, nz, ierr, j,rstart, rend, fd; 1019 Scalar *vals,*svals; 1020 MPI_Comm comm = ((PetscObject)viewer)->comm; 1021 MPI_Status status; 1022 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1023 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1024 int tag = ((PetscObject)viewer)->tag; 1025 1026 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1027 if (!rank) { 1028 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1029 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1030 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 1031 } 1032 1033 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1034 M = header[1]; N = header[2]; nz = header[3]; 1035 1036 /* 1037 Handle case where matrix is stored on disk as a dense matrix 1038 */ 1039 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1040 return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 1041 } 1042 1043 /* determine ownership of all rows */ 1044 m = M/size + ((M % size) > rank); 1045 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1046 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1047 rowners[0] = 0; 1048 for ( i=2; i<=size; i++ ) { 1049 rowners[i] += rowners[i-1]; 1050 } 1051 rstart = rowners[rank]; 1052 rend = rowners[rank+1]; 1053 1054 /* distribute row lengths to all processors */ 1055 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1056 offlens = ourlens + (rend-rstart); 1057 if (!rank) { 1058 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1059 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1060 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1061 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1062 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1063 PetscFree(sndcounts); 1064 } 1065 else { 1066 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1067 } 1068 1069 if (!rank) { 1070 /* calculate the number of nonzeros on each processor */ 1071 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1072 PetscMemzero(procsnz,size*sizeof(int)); 1073 for ( i=0; i<size; i++ ) { 1074 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1075 procsnz[i] += rowlengths[j]; 1076 } 1077 } 1078 PetscFree(rowlengths); 1079 1080 /* determine max buffer needed and allocate it */ 1081 maxnz = 0; 1082 for ( i=0; i<size; i++ ) { 1083 maxnz = PetscMax(maxnz,procsnz[i]); 1084 } 1085 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1086 1087 /* read in my part of the matrix column indices */ 1088 nz = procsnz[0]; 1089 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1090 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1091 1092 /* read in every one elses and ship off */ 1093 for ( i=1; i<size; i++ ) { 1094 nz = procsnz[i]; 1095 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1096 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1097 } 1098 PetscFree(cols); 1099 } 1100 else { 1101 /* determine buffer space needed for message */ 1102 nz = 0; 1103 for ( i=0; i<m; i++ ) { 1104 nz += ourlens[i]; 1105 } 1106 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1107 1108 /* receive message of column indices*/ 1109 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1110 MPI_Get_count(&status,MPI_INT,&maxnz); 1111 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1112 } 1113 1114 /* loop over local rows, determining number of off diagonal entries */ 1115 PetscMemzero(offlens,m*sizeof(int)); 1116 jj = 0; 1117 for ( i=0; i<m; i++ ) { 1118 for ( j=0; j<ourlens[i]; j++ ) { 1119 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1120 jj++; 1121 } 1122 } 1123 1124 /* create our matrix */ 1125 for ( i=0; i<m; i++ ) { 1126 ourlens[i] -= offlens[i]; 1127 } 1128 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1129 A = *newmat; 1130 for ( i=0; i<m; i++ ) { 1131 ourlens[i] += offlens[i]; 1132 } 1133 1134 if (!rank) { 1135 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1136 1137 /* read in my part of the matrix numerical values */ 1138 nz = procsnz[0]; 1139 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1140 1141 /* insert into matrix */ 1142 jj = rstart; 1143 smycols = mycols; 1144 svals = vals; 1145 for ( i=0; i<m; i++ ) { 1146 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1147 smycols += ourlens[i]; 1148 svals += ourlens[i]; 1149 jj++; 1150 } 1151 1152 /* read in other processors and ship out */ 1153 for ( i=1; i<size; i++ ) { 1154 nz = procsnz[i]; 1155 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1156 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1157 } 1158 PetscFree(procsnz); 1159 } 1160 else { 1161 /* receive numeric values */ 1162 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1163 1164 /* receive message of values*/ 1165 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1166 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1167 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1168 1169 /* insert into matrix */ 1170 jj = rstart; 1171 smycols = mycols; 1172 svals = vals; 1173 for ( i=0; i<m; i++ ) { 1174 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1175 smycols += ourlens[i]; 1176 svals += ourlens[i]; 1177 jj++; 1178 } 1179 } 1180 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1181 1182 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1183 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1184 return 0; 1185 } 1186 1187 1188 1189 1190 1191