1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.57 1996/11/30 21:47:49 curfman Exp curfman $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 13 { 14 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 15 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 16 int roworiented = A->roworiented; 17 18 if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 19 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 20 } 21 A->insertmode = addv; 22 for ( i=0; i<m; i++ ) { 23 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 24 if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 25 if (idxm[i] >= rstart && idxm[i] < rend) { 26 row = idxm[i] - rstart; 27 if (roworiented) { 28 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 29 } 30 else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 33 if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 35 } 36 } 37 } 38 else { 39 if (roworiented) { 40 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 41 } 42 else { /* must stash each seperately */ 43 row = idxm[i]; 44 for ( j=0; j<n; j++ ) { 45 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 46 } 47 } 48 } 49 } 50 return 0; 51 } 52 53 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 54 { 55 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 56 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 57 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 65 if (idxn[j] >= mdn->N) 66 SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 67 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68 } 69 } 70 else { 71 SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 72 } 73 } 74 return 0; 75 } 76 77 static int MatGetArray_MPIDense(Mat A,Scalar **array) 78 { 79 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 80 int ierr; 81 82 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 83 return 0; 84 } 85 86 static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 87 { 88 return 0; 89 } 90 91 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 92 { 93 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 94 MPI_Comm comm = mat->comm; 95 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 96 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 97 int tag = mat->tag, *owner,*starts,count,ierr; 98 InsertMode addv; 99 MPI_Request *send_waits,*recv_waits; 100 Scalar *rvalues,*svalues; 101 102 /* make sure all processors are either in INSERTMODE or ADDMODE */ 103 MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 104 if (addv == (ADD_VALUES|INSERT_VALUES)) { 105 SETERRQ(1,"MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 106 } 107 mdn->insertmode = addv; /* in case this processor had no cache */ 108 109 /* first count number of contributors to each processor */ 110 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 111 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 112 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 113 for ( i=0; i<mdn->stash.n; i++ ) { 114 idx = mdn->stash.idx[i]; 115 for ( j=0; j<size; j++ ) { 116 if (idx >= owners[j] && idx < owners[j+1]) { 117 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 118 } 119 } 120 } 121 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 122 123 /* inform other processors of number of messages and max length*/ 124 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 125 MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 126 nreceives = work[rank]; 127 if (nreceives > size) SETERRQ(1,"MatAssemblyBegin_MPIDense:Internal PETSc error"); 128 MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 129 nmax = work[rank]; 130 PetscFree(work); 131 132 /* post receives: 133 1) each message will consist of ordered pairs 134 (global index,value) we store the global index as a double 135 to simplify the message passing. 136 2) since we don't know how long each individual message is we 137 allocate the largest needed buffer for each receive. Potentially 138 this is a lot of wasted space. 139 140 This could be done better. 141 */ 142 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 143 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 144 for ( i=0; i<nreceives; i++ ) { 145 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 146 } 147 148 /* do sends: 149 1) starts[i] gives the starting index in svalues for stuff going to 150 the ith processor 151 */ 152 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 153 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 154 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 155 starts[0] = 0; 156 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 157 for ( i=0; i<mdn->stash.n; i++ ) { 158 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 159 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 160 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 161 } 162 PetscFree(owner); 163 starts[0] = 0; 164 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 165 count = 0; 166 for ( i=0; i<size; i++ ) { 167 if (procs[i]) { 168 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 169 } 170 } 171 PetscFree(starts); PetscFree(nprocs); 172 173 /* Free cache space */ 174 PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 175 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 176 177 mdn->svalues = svalues; mdn->rvalues = rvalues; 178 mdn->nsends = nsends; mdn->nrecvs = nreceives; 179 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 180 mdn->rmax = nmax; 181 182 return 0; 183 } 184 extern int MatSetUpMultiply_MPIDense(Mat); 185 186 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 187 { 188 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 189 MPI_Status *send_status,recv_status; 190 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 191 Scalar *values,val; 192 InsertMode addv = mdn->insertmode; 193 194 /* wait on receives */ 195 while (count) { 196 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 197 /* unpack receives into our local space */ 198 values = mdn->rvalues + 3*imdex*mdn->rmax; 199 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 200 n = n/3; 201 for ( i=0; i<n; i++ ) { 202 row = (int) PetscReal(values[3*i]) - mdn->rstart; 203 col = (int) PetscReal(values[3*i+1]); 204 val = values[3*i+2]; 205 if (col >= 0 && col < mdn->N) { 206 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 207 } 208 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 209 } 210 count--; 211 } 212 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 213 214 /* wait on sends */ 215 if (mdn->nsends) { 216 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 217 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 218 PetscFree(send_status); 219 } 220 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 221 222 mdn->insertmode = NOT_SET_VALUES; 223 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 224 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 225 226 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 227 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 228 } 229 return 0; 230 } 231 232 static int MatZeroEntries_MPIDense(Mat A) 233 { 234 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 235 return MatZeroEntries(l->A); 236 } 237 238 static int MatGetBlockSize_MPIDense(Mat A,int *bs) 239 { 240 *bs = 1; 241 return 0; 242 } 243 244 /* the code does not do the diagonal entries correctly unless the 245 matrix is square and the column and row owerships are identical. 246 This is a BUG. The only way to fix it seems to be to access 247 mdn->A and mdn->B directly and not through the MatZeroRows() 248 routine. 249 */ 250 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 251 { 252 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 253 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 254 int *procs,*nprocs,j,found,idx,nsends,*work; 255 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 256 int *rvalues,tag = A->tag,count,base,slen,n,*source; 257 int *lens,imdex,*lrows,*values; 258 MPI_Comm comm = A->comm; 259 MPI_Request *send_waits,*recv_waits; 260 MPI_Status recv_status,*send_status; 261 IS istmp; 262 263 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 264 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 265 266 /* first count number of contributors to each processor */ 267 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 268 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 269 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 270 for ( i=0; i<N; i++ ) { 271 idx = rows[i]; 272 found = 0; 273 for ( j=0; j<size; j++ ) { 274 if (idx >= owners[j] && idx < owners[j+1]) { 275 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 276 } 277 } 278 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 279 } 280 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 281 282 /* inform other processors of number of messages and max length*/ 283 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 284 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 285 nrecvs = work[rank]; 286 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 287 nmax = work[rank]; 288 PetscFree(work); 289 290 /* post receives: */ 291 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 292 CHKPTRQ(rvalues); 293 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 294 CHKPTRQ(recv_waits); 295 for ( i=0; i<nrecvs; i++ ) { 296 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 297 } 298 299 /* do sends: 300 1) starts[i] gives the starting index in svalues for stuff going to 301 the ith processor 302 */ 303 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 304 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 305 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 306 starts[0] = 0; 307 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 308 for ( i=0; i<N; i++ ) { 309 svalues[starts[owner[i]]++] = rows[i]; 310 } 311 ISRestoreIndices(is,&rows); 312 313 starts[0] = 0; 314 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 315 count = 0; 316 for ( i=0; i<size; i++ ) { 317 if (procs[i]) { 318 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 319 } 320 } 321 PetscFree(starts); 322 323 base = owners[rank]; 324 325 /* wait on receives */ 326 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 327 source = lens + nrecvs; 328 count = nrecvs; slen = 0; 329 while (count) { 330 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 331 /* unpack receives into our local space */ 332 MPI_Get_count(&recv_status,MPI_INT,&n); 333 source[imdex] = recv_status.MPI_SOURCE; 334 lens[imdex] = n; 335 slen += n; 336 count--; 337 } 338 PetscFree(recv_waits); 339 340 /* move the data into the send scatter */ 341 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 342 count = 0; 343 for ( i=0; i<nrecvs; i++ ) { 344 values = rvalues + i*nmax; 345 for ( j=0; j<lens[i]; j++ ) { 346 lrows[count++] = values[j] - base; 347 } 348 } 349 PetscFree(rvalues); PetscFree(lens); 350 PetscFree(owner); PetscFree(nprocs); 351 352 /* actually zap the local rows */ 353 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 354 PLogObjectParent(A,istmp); 355 PetscFree(lrows); 356 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 357 ierr = ISDestroy(istmp); CHKERRQ(ierr); 358 359 /* wait on sends */ 360 if (nsends) { 361 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 362 MPI_Waitall(nsends,send_waits,send_status); 363 PetscFree(send_status); 364 } 365 PetscFree(send_waits); PetscFree(svalues); 366 367 return 0; 368 } 369 370 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 371 { 372 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 373 int ierr; 374 375 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 376 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 377 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 378 return 0; 379 } 380 381 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 382 { 383 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 384 int ierr; 385 386 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 387 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 388 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 389 return 0; 390 } 391 392 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 393 { 394 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 395 int ierr; 396 Scalar zero = 0.0; 397 398 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 399 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 400 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 401 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 402 return 0; 403 } 404 405 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 406 { 407 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 408 int ierr; 409 410 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 411 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 412 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 413 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 414 return 0; 415 } 416 417 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 418 { 419 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 420 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 421 int ierr, len, i, n, m = a->m, radd; 422 Scalar *x, zero = 0.0; 423 424 VecSet(&zero,v); 425 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 426 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 427 if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 428 len = PetscMin(aloc->m,aloc->n); 429 radd = a->rstart*m; 430 for ( i=0; i<len; i++ ) { 431 x[i] = aloc->v[radd + i*m + i]; 432 } 433 return 0; 434 } 435 436 static int MatDestroy_MPIDense(PetscObject obj) 437 { 438 Mat mat = (Mat) obj; 439 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 440 int ierr; 441 442 #if defined(PETSC_LOG) 443 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 444 #endif 445 PetscFree(mdn->rowners); 446 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 447 if (mdn->lvec) VecDestroy(mdn->lvec); 448 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 449 if (mdn->factor) { 450 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 451 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 452 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 453 PetscFree(mdn->factor); 454 } 455 PetscFree(mdn); 456 if (mat->mapping) { 457 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 458 } 459 PLogObjectDestroy(mat); 460 PetscHeaderDestroy(mat); 461 return 0; 462 } 463 464 #include "pinclude/pviewer.h" 465 466 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 469 int ierr; 470 471 if (mdn->size == 1) { 472 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 473 } 474 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 475 return 0; 476 } 477 478 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 479 { 480 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 481 int ierr, format; 482 FILE *fd; 483 ViewerType vtype; 484 485 ViewerGetType(viewer,&vtype); 486 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 487 ierr = ViewerGetFormat(viewer,&format); 488 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 489 int rank; 490 MatInfo info; 491 MPI_Comm_rank(mat->comm,&rank); 492 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 493 PetscSequentialPhaseBegin(mat->comm,1); 494 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 495 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 496 fflush(fd); 497 PetscSequentialPhaseEnd(mat->comm,1); 498 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 499 return 0; 500 } 501 else if (format == VIEWER_FORMAT_ASCII_INFO) { 502 return 0; 503 } 504 if (vtype == ASCII_FILE_VIEWER) { 505 PetscSequentialPhaseBegin(mat->comm,1); 506 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 507 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 508 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 509 fflush(fd); 510 PetscSequentialPhaseEnd(mat->comm,1); 511 } 512 else { 513 int size = mdn->size, rank = mdn->rank; 514 if (size == 1) { 515 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 516 } 517 else { 518 /* assemble the entire matrix onto first processor. */ 519 Mat A; 520 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 521 Scalar *vals; 522 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 523 524 if (!rank) { 525 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 526 } 527 else { 528 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 529 } 530 PLogObjectParent(mat,A); 531 532 /* Copy the matrix ... This isn't the most efficient means, 533 but it's quick for now */ 534 row = mdn->rstart; m = Amdn->m; 535 for ( i=0; i<m; i++ ) { 536 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 537 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 538 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 539 row++; 540 } 541 542 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 543 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 544 if (!rank) { 545 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 546 } 547 ierr = MatDestroy(A); CHKERRQ(ierr); 548 } 549 } 550 return 0; 551 } 552 553 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 554 { 555 Mat mat = (Mat) obj; 556 int ierr; 557 ViewerType vtype; 558 559 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 560 if (vtype == ASCII_FILE_VIEWER) { 561 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 562 } 563 else if (vtype == ASCII_FILES_VIEWER) { 564 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 565 } 566 else if (vtype == BINARY_FILE_VIEWER) { 567 return MatView_MPIDense_Binary(mat,viewer); 568 } 569 return 0; 570 } 571 572 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 573 { 574 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 575 Mat mdn = mat->A; 576 int ierr; 577 double isend[5], irecv[5]; 578 579 info->rows_global = (double)mat->M; 580 info->columns_global = (double)mat->N; 581 info->rows_local = (double)mat->m; 582 info->columns_local = (double)mat->N; 583 info->block_size = 1.0; 584 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 585 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 586 isend[3] = info->memory; isend[4] = info->mallocs; 587 if (flag == MAT_LOCAL) { 588 info->nz_used = isend[0]; 589 info->nz_allocated = isend[1]; 590 info->nz_unneeded = isend[2]; 591 info->memory = isend[3]; 592 info->mallocs = isend[4]; 593 } else if (flag == MAT_GLOBAL_MAX) { 594 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 595 info->nz_used = irecv[0]; 596 info->nz_allocated = irecv[1]; 597 info->nz_unneeded = irecv[2]; 598 info->memory = irecv[3]; 599 info->mallocs = irecv[4]; 600 } else if (flag == MAT_GLOBAL_SUM) { 601 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 602 info->nz_used = irecv[0]; 603 info->nz_allocated = irecv[1]; 604 info->nz_unneeded = irecv[2]; 605 info->memory = irecv[3]; 606 info->mallocs = irecv[4]; 607 } 608 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 609 info->fill_ratio_needed = 0; 610 info->factor_mallocs = 0; 611 return 0; 612 } 613 614 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 615 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 616 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 617 extern int MatSolve_MPIDense(Mat,Vec,Vec); 618 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 619 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 620 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 621 622 static int MatSetOption_MPIDense(Mat A,MatOption op) 623 { 624 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 625 626 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 627 op == MAT_YES_NEW_NONZERO_LOCATIONS || 628 op == MAT_COLUMNS_SORTED || 629 op == MAT_COLUMNS_UNSORTED) { 630 MatSetOption(a->A,op); 631 } else if (op == MAT_ROW_ORIENTED) { 632 a->roworiented = 1; 633 MatSetOption(a->A,op); 634 } else if (op == MAT_ROWS_SORTED || 635 op == MAT_ROWS_UNSORTED || 636 op == MAT_SYMMETRIC || 637 op == MAT_STRUCTURALLY_SYMMETRIC || 638 op == MAT_YES_NEW_DIAGONALS) 639 PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 640 else if (op == MAT_COLUMN_ORIENTED) 641 {a->roworiented = 0; MatSetOption(a->A,op);} 642 else if (op == MAT_NO_NEW_DIAGONALS) 643 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");} 644 else 645 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 646 return 0; 647 } 648 649 static int MatGetSize_MPIDense(Mat A,int *m,int *n) 650 { 651 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 652 *m = mat->M; *n = mat->N; 653 return 0; 654 } 655 656 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 657 { 658 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 659 *m = mat->m; *n = mat->N; 660 return 0; 661 } 662 663 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 664 { 665 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 666 *m = mat->rstart; *n = mat->rend; 667 return 0; 668 } 669 670 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 671 { 672 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 673 int lrow, rstart = mat->rstart, rend = mat->rend; 674 675 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 676 lrow = row - rstart; 677 return MatGetRow(mat->A,lrow,nz,idx,v); 678 } 679 680 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 681 { 682 if (idx) PetscFree(*idx); 683 if (v) PetscFree(*v); 684 return 0; 685 } 686 687 static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 688 { 689 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 690 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 691 int ierr, i, j; 692 double sum = 0.0; 693 Scalar *v = mat->v; 694 695 if (mdn->size == 1) { 696 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 697 } else { 698 if (type == NORM_FROBENIUS) { 699 for (i=0; i<mat->n*mat->m; i++ ) { 700 #if defined(PETSC_COMPLEX) 701 sum += real(conj(*v)*(*v)); v++; 702 #else 703 sum += (*v)*(*v); v++; 704 #endif 705 } 706 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 707 *norm = sqrt(*norm); 708 PLogFlops(2*mat->n*mat->m); 709 } 710 else if (type == NORM_1) { 711 double *tmp, *tmp2; 712 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 713 tmp2 = tmp + mdn->N; 714 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 715 *norm = 0.0; 716 v = mat->v; 717 for ( j=0; j<mat->n; j++ ) { 718 for ( i=0; i<mat->m; i++ ) { 719 tmp[j] += PetscAbsScalar(*v); v++; 720 } 721 } 722 MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 723 for ( j=0; j<mdn->N; j++ ) { 724 if (tmp2[j] > *norm) *norm = tmp2[j]; 725 } 726 PetscFree(tmp); 727 PLogFlops(mat->n*mat->m); 728 } 729 else if (type == NORM_INFINITY) { /* max row norm */ 730 double ntemp; 731 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 732 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 733 } 734 else { 735 SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 736 } 737 } 738 return 0; 739 } 740 741 static int MatTranspose_MPIDense(Mat A,Mat *matout) 742 { 743 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 744 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 745 Mat B; 746 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 747 int j, i, ierr; 748 Scalar *v; 749 750 if (matout == PETSC_NULL && M != N) { 751 SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 752 } 753 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 754 755 m = Aloc->m; n = Aloc->n; v = Aloc->v; 756 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 757 for ( j=0; j<n; j++ ) { 758 for (i=0; i<m; i++) rwork[i] = rstart + i; 759 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 760 v += m; 761 } 762 PetscFree(rwork); 763 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 764 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 765 if (matout != PETSC_NULL) { 766 *matout = B; 767 } else { 768 /* This isn't really an in-place transpose, but free data struct from a */ 769 PetscFree(a->rowners); 770 ierr = MatDestroy(a->A); CHKERRQ(ierr); 771 if (a->lvec) VecDestroy(a->lvec); 772 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 773 PetscFree(a); 774 PetscMemcpy(A,B,sizeof(struct _Mat)); 775 PetscHeaderDestroy(B); 776 } 777 return 0; 778 } 779 780 #include "pinclude/plapack.h" 781 static int MatScale_MPIDense(Scalar *alpha,Mat inA) 782 { 783 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 784 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 785 int one = 1, nz; 786 787 nz = a->m*a->n; 788 BLscal_( &nz, alpha, a->v, &one ); 789 PLogFlops(nz); 790 return 0; 791 } 792 793 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 794 795 /* -------------------------------------------------------------------*/ 796 static struct _MatOps MatOps = {MatSetValues_MPIDense, 797 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 798 MatMult_MPIDense,MatMultAdd_MPIDense, 799 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 800 /* MatSolve_MPIDense,0, */ 801 0,0, 802 0,0, 803 0,0, 804 /* MatLUFactor_MPIDense,0, */ 805 0,MatTranspose_MPIDense, 806 MatGetInfo_MPIDense,0, 807 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 808 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 809 0, 810 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 811 0,0, 812 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 813 0,0, 814 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 815 MatGetOwnershipRange_MPIDense, 816 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 817 0,MatConvertSameType_MPIDense, 818 0,0,0,0,0, 819 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 820 0,0,0,MatGetBlockSize_MPIDense}; 821 822 /*@C 823 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 824 825 Input Parameters: 826 . comm - MPI communicator 827 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 828 . n - number of local columns (or PETSC_DECIDE to have calculated 829 if N is given) 830 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 831 . N - number of global columns (or PETSC_DECIDE to have calculated 832 if n is given) 833 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 834 to control all matrix memory allocation. 835 836 Output Parameter: 837 . A - the matrix 838 839 Notes: 840 The dense format is fully compatible with standard Fortran 77 841 storage by columns. 842 843 The data input variable is intended primarily for Fortran programmers 844 who wish to allocate their own matrix memory space. Most users should 845 set data=PETSC_NULL. 846 847 The user MUST specify either the local or global matrix dimensions 848 (possibly both). 849 850 Currently, the only parallel dense matrix decomposition is by rows, 851 so that n=N and each submatrix owns all of the global columns. 852 853 .keywords: matrix, dense, parallel 854 855 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 856 @*/ 857 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 858 { 859 Mat mat; 860 Mat_MPIDense *a; 861 int ierr, i,flg; 862 863 /* Note: For now, when data is specified above, this assumes the user correctly 864 allocates the local dense storage space. We should add error checking. */ 865 866 *A = 0; 867 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 868 PLogObjectCreate(mat); 869 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 870 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 871 mat->destroy = MatDestroy_MPIDense; 872 mat->view = MatView_MPIDense; 873 mat->factor = 0; 874 mat->mapping = 0; 875 876 a->factor = 0; 877 a->insertmode = NOT_SET_VALUES; 878 MPI_Comm_rank(comm,&a->rank); 879 MPI_Comm_size(comm,&a->size); 880 881 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 882 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 883 884 /* each row stores all columns */ 885 if (N == PETSC_DECIDE) N = n; 886 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 887 /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 888 a->N = mat->N = N; 889 a->M = mat->M = M; 890 a->m = mat->m = m; 891 a->n = mat->n = n; 892 893 /* build local table of row and column ownerships */ 894 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 895 a->cowners = a->rowners + a->size + 1; 896 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 897 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 898 a->rowners[0] = 0; 899 for ( i=2; i<=a->size; i++ ) { 900 a->rowners[i] += a->rowners[i-1]; 901 } 902 a->rstart = a->rowners[a->rank]; 903 a->rend = a->rowners[a->rank+1]; 904 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 905 a->cowners[0] = 0; 906 for ( i=2; i<=a->size; i++ ) { 907 a->cowners[i] += a->cowners[i-1]; 908 } 909 910 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 911 PLogObjectParent(mat,a->A); 912 913 /* build cache for off array entries formed */ 914 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 915 916 /* stuff used for matrix vector multiply */ 917 a->lvec = 0; 918 a->Mvctx = 0; 919 a->roworiented = 1; 920 921 *A = mat; 922 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 923 if (flg) { 924 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 925 } 926 return 0; 927 } 928 929 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 930 { 931 Mat mat; 932 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 933 int ierr; 934 FactorCtx *factor; 935 936 *newmat = 0; 937 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 938 PLogObjectCreate(mat); 939 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 940 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 941 mat->destroy = MatDestroy_MPIDense; 942 mat->view = MatView_MPIDense; 943 mat->factor = A->factor; 944 mat->assembled = PETSC_TRUE; 945 946 a->m = mat->m = oldmat->m; 947 a->n = mat->n = oldmat->n; 948 a->M = mat->M = oldmat->M; 949 a->N = mat->N = oldmat->N; 950 if (oldmat->factor) { 951 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 952 /* copy factor contents ... add this code! */ 953 } else a->factor = 0; 954 955 a->rstart = oldmat->rstart; 956 a->rend = oldmat->rend; 957 a->size = oldmat->size; 958 a->rank = oldmat->rank; 959 a->insertmode = NOT_SET_VALUES; 960 961 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 962 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 963 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 964 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 965 966 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 967 PLogObjectParent(mat,a->lvec); 968 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 969 PLogObjectParent(mat,a->Mvctx); 970 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 971 PLogObjectParent(mat,a->A); 972 *newmat = mat; 973 return 0; 974 } 975 976 #include "sys.h" 977 978 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 979 { 980 int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 981 Scalar *array,*vals,*vals_ptr; 982 MPI_Status status; 983 984 MPI_Comm_rank(comm,&rank); 985 MPI_Comm_size(comm,&size); 986 987 /* determine ownership of all rows */ 988 m = M/size + ((M % size) > rank); 989 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 990 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 991 rowners[0] = 0; 992 for ( i=2; i<=size; i++ ) { 993 rowners[i] += rowners[i-1]; 994 } 995 rstart = rowners[rank]; 996 rend = rowners[rank+1]; 997 998 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 999 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 1000 1001 if (!rank) { 1002 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1003 1004 /* read in my part of the matrix numerical values */ 1005 ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 1006 1007 /* insert into matrix-by row (this is why cannot directly read into array */ 1008 vals_ptr = vals; 1009 for ( i=0; i<m; i++ ) { 1010 for ( j=0; j<N; j++ ) { 1011 array[i + j*m] = *vals_ptr++; 1012 } 1013 } 1014 1015 /* read in other processors and ship out */ 1016 for ( i=1; i<size; i++ ) { 1017 nz = (rowners[i+1] - rowners[i])*N; 1018 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1019 MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 1020 } 1021 } 1022 else { 1023 /* receive numeric values */ 1024 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1025 1026 /* receive message of values*/ 1027 MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 1028 1029 /* insert into matrix-by row (this is why cannot directly read into array */ 1030 vals_ptr = vals; 1031 for ( i=0; i<m; i++ ) { 1032 for ( j=0; j<N; j++ ) { 1033 array[i + j*m] = *vals_ptr++; 1034 } 1035 } 1036 } 1037 PetscFree(rowners); 1038 PetscFree(vals); 1039 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1040 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1041 return 0; 1042 } 1043 1044 1045 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1046 { 1047 Mat A; 1048 int i, nz, ierr, j,rstart, rend, fd; 1049 Scalar *vals,*svals; 1050 MPI_Comm comm = ((PetscObject)viewer)->comm; 1051 MPI_Status status; 1052 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1053 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1054 int tag = ((PetscObject)viewer)->tag; 1055 1056 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1057 if (!rank) { 1058 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1059 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1060 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 1061 } 1062 1063 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1064 M = header[1]; N = header[2]; nz = header[3]; 1065 1066 /* 1067 Handle case where matrix is stored on disk as a dense matrix 1068 */ 1069 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1070 return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 1071 } 1072 1073 /* determine ownership of all rows */ 1074 m = M/size + ((M % size) > rank); 1075 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1076 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1077 rowners[0] = 0; 1078 for ( i=2; i<=size; i++ ) { 1079 rowners[i] += rowners[i-1]; 1080 } 1081 rstart = rowners[rank]; 1082 rend = rowners[rank+1]; 1083 1084 /* distribute row lengths to all processors */ 1085 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1086 offlens = ourlens + (rend-rstart); 1087 if (!rank) { 1088 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1089 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1090 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1091 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1092 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1093 PetscFree(sndcounts); 1094 } 1095 else { 1096 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1097 } 1098 1099 if (!rank) { 1100 /* calculate the number of nonzeros on each processor */ 1101 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1102 PetscMemzero(procsnz,size*sizeof(int)); 1103 for ( i=0; i<size; i++ ) { 1104 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1105 procsnz[i] += rowlengths[j]; 1106 } 1107 } 1108 PetscFree(rowlengths); 1109 1110 /* determine max buffer needed and allocate it */ 1111 maxnz = 0; 1112 for ( i=0; i<size; i++ ) { 1113 maxnz = PetscMax(maxnz,procsnz[i]); 1114 } 1115 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1116 1117 /* read in my part of the matrix column indices */ 1118 nz = procsnz[0]; 1119 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1120 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1121 1122 /* read in every one elses and ship off */ 1123 for ( i=1; i<size; i++ ) { 1124 nz = procsnz[i]; 1125 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1126 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1127 } 1128 PetscFree(cols); 1129 } 1130 else { 1131 /* determine buffer space needed for message */ 1132 nz = 0; 1133 for ( i=0; i<m; i++ ) { 1134 nz += ourlens[i]; 1135 } 1136 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1137 1138 /* receive message of column indices*/ 1139 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1140 MPI_Get_count(&status,MPI_INT,&maxnz); 1141 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1142 } 1143 1144 /* loop over local rows, determining number of off diagonal entries */ 1145 PetscMemzero(offlens,m*sizeof(int)); 1146 jj = 0; 1147 for ( i=0; i<m; i++ ) { 1148 for ( j=0; j<ourlens[i]; j++ ) { 1149 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1150 jj++; 1151 } 1152 } 1153 1154 /* create our matrix */ 1155 for ( i=0; i<m; i++ ) { 1156 ourlens[i] -= offlens[i]; 1157 } 1158 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1159 A = *newmat; 1160 for ( i=0; i<m; i++ ) { 1161 ourlens[i] += offlens[i]; 1162 } 1163 1164 if (!rank) { 1165 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1166 1167 /* read in my part of the matrix numerical values */ 1168 nz = procsnz[0]; 1169 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1170 1171 /* insert into matrix */ 1172 jj = rstart; 1173 smycols = mycols; 1174 svals = vals; 1175 for ( i=0; i<m; i++ ) { 1176 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1177 smycols += ourlens[i]; 1178 svals += ourlens[i]; 1179 jj++; 1180 } 1181 1182 /* read in other processors and ship out */ 1183 for ( i=1; i<size; i++ ) { 1184 nz = procsnz[i]; 1185 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1186 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1187 } 1188 PetscFree(procsnz); 1189 } 1190 else { 1191 /* receive numeric values */ 1192 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1193 1194 /* receive message of values*/ 1195 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1196 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1197 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1198 1199 /* insert into matrix */ 1200 jj = rstart; 1201 smycols = mycols; 1202 svals = vals; 1203 for ( i=0; i<m; i++ ) { 1204 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1205 smycols += ourlens[i]; 1206 svals += ourlens[i]; 1207 jj++; 1208 } 1209 } 1210 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1211 1212 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1213 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1214 return 0; 1215 } 1216 1217 1218 1219 1220 1221