1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.20 1995/12/29 23:19:09 curfman Exp curfman $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "mpidense.h" 10 #include "vec/vecimpl.h" 11 #include "inline/spops.h" 12 13 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 14 int *idxn,Scalar *v,InsertMode addv) 15 { 16 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 17 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 18 int roworiented = A->roworiented; 19 20 if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 21 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 22 } 23 A->insertmode = addv; 24 for ( i=0; i<m; i++ ) { 25 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 26 if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 27 if (idxm[i] >= rstart && idxm[i] < rend) { 28 row = idxm[i] - rstart; 29 if (roworiented) { 30 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 31 } 32 else { 33 for ( j=0; j<n; j++ ) { 34 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 35 if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 36 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 37 } 38 } 39 } 40 else { 41 if (roworiented) { 42 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 43 } 44 else { /* must stash each seperately */ 45 row = idxm[i]; 46 for ( j=0; j<n; j++ ) { 47 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv); 48 CHKERRQ(ierr); 49 } 50 } 51 } 52 } 53 return 0; 54 } 55 56 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 57 { 58 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 59 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 60 61 if (!mdn->assembled) SETERRQ(1,"MatGetValues_MPIDense:Not for unassembled matrix"); 62 for ( i=0; i<m; i++ ) { 63 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 64 if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 65 if (idxm[i] >= rstart && idxm[i] < rend) { 66 row = idxm[i] - rstart; 67 for ( j=0; j<n; j++ ) { 68 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 69 if (idxn[j] >= mdn->N) 70 SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 71 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 72 } 73 } 74 else { 75 SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 76 } 77 } 78 return 0; 79 } 80 81 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 82 { 83 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 84 MPI_Comm comm = mat->comm; 85 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 86 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 87 int tag = mat->tag, *owner,*starts,count,ierr; 88 InsertMode addv; 89 MPI_Request *send_waits,*recv_waits; 90 Scalar *rvalues,*svalues; 91 92 /* make sure all processors are either in INSERTMODE or ADDMODE */ 93 MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 94 MPI_BOR,comm); 95 if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 96 "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 97 } 98 mdn->insertmode = addv; /* in case this processor had no cache */ 99 100 /* first count number of contributors to each processor */ 101 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 102 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 103 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 104 for ( i=0; i<mdn->stash.n; i++ ) { 105 idx = mdn->stash.idx[i]; 106 for ( j=0; j<size; j++ ) { 107 if (idx >= owners[j] && idx < owners[j+1]) { 108 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 109 } 110 } 111 } 112 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 113 114 /* inform other processors of number of messages and max length*/ 115 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 116 MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 117 nreceives = work[rank]; 118 MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 119 nmax = work[rank]; 120 PetscFree(work); 121 122 /* post receives: 123 1) each message will consist of ordered pairs 124 (global index,value) we store the global index as a double 125 to simplify the message passing. 126 2) since we don't know how long each individual message is we 127 allocate the largest needed buffer for each receive. Potentially 128 this is a lot of wasted space. 129 130 This could be done better. 131 */ 132 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 133 CHKPTRQ(rvalues); 134 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 135 CHKPTRQ(recv_waits); 136 for ( i=0; i<nreceives; i++ ) { 137 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 138 comm,recv_waits+i); 139 } 140 141 /* do sends: 142 1) starts[i] gives the starting index in svalues for stuff going to 143 the ith processor 144 */ 145 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 146 CHKPTRQ(svalues); 147 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 148 CHKPTRQ(send_waits); 149 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 150 starts[0] = 0; 151 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 152 for ( i=0; i<mdn->stash.n; i++ ) { 153 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 154 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 155 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 156 } 157 PetscFree(owner); 158 starts[0] = 0; 159 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 160 count = 0; 161 for ( i=0; i<size; i++ ) { 162 if (procs[i]) { 163 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 164 comm,send_waits+count++); 165 } 166 } 167 PetscFree(starts); PetscFree(nprocs); 168 169 /* Free cache space */ 170 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 171 172 mdn->svalues = svalues; mdn->rvalues = rvalues; 173 mdn->nsends = nsends; mdn->nrecvs = nreceives; 174 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 175 mdn->rmax = nmax; 176 177 return 0; 178 } 179 extern int MatSetUpMultiply_MPIDense(Mat); 180 181 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 182 { 183 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 184 MPI_Status *send_status,recv_status; 185 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 186 Scalar *values,val; 187 InsertMode addv = mdn->insertmode; 188 189 /* wait on receives */ 190 while (count) { 191 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 192 /* unpack receives into our local space */ 193 values = mdn->rvalues + 3*imdex*mdn->rmax; 194 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 195 n = n/3; 196 for ( i=0; i<n; i++ ) { 197 row = (int) PETSCREAL(values[3*i]) - mdn->rstart; 198 col = (int) PETSCREAL(values[3*i+1]); 199 val = values[3*i+2]; 200 if (col >= 0 && col < mdn->N) { 201 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 202 } 203 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 204 } 205 count--; 206 } 207 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 208 209 /* wait on sends */ 210 if (mdn->nsends) { 211 send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 212 CHKPTRQ(send_status); 213 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 214 PetscFree(send_status); 215 } 216 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 217 218 mdn->insertmode = NOT_SET_VALUES; 219 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 220 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 221 222 if (!mdn->assembled && mode == FINAL_ASSEMBLY) { 223 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 224 } 225 mdn->assembled = 1; 226 return 0; 227 } 228 229 static int MatZeroEntries_MPIDense(Mat A) 230 { 231 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 232 return MatZeroEntries(l->A); 233 } 234 235 /* the code does not do the diagonal entries correctly unless the 236 matrix is square and the column and row owerships are identical. 237 This is a BUG. The only way to fix it seems to be to access 238 mdn->A and mdn->B directly and not through the MatZeroRows() 239 routine. 240 */ 241 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 242 { 243 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 244 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 245 int *procs,*nprocs,j,found,idx,nsends,*work; 246 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 247 int *rvalues,tag = A->tag,count,base,slen,n,*source; 248 int *lens,imdex,*lrows,*values; 249 MPI_Comm comm = A->comm; 250 MPI_Request *send_waits,*recv_waits; 251 MPI_Status recv_status,*send_status; 252 IS istmp; 253 254 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix"); 255 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 256 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 257 258 /* first count number of contributors to each processor */ 259 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 260 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 261 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 262 for ( i=0; i<N; i++ ) { 263 idx = rows[i]; 264 found = 0; 265 for ( j=0; j<size; j++ ) { 266 if (idx >= owners[j] && idx < owners[j+1]) { 267 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 268 } 269 } 270 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 271 } 272 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 273 274 /* inform other processors of number of messages and max length*/ 275 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 276 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 277 nrecvs = work[rank]; 278 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 279 nmax = work[rank]; 280 PetscFree(work); 281 282 /* post receives: */ 283 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 284 CHKPTRQ(rvalues); 285 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 286 CHKPTRQ(recv_waits); 287 for ( i=0; i<nrecvs; i++ ) { 288 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 289 } 290 291 /* do sends: 292 1) starts[i] gives the starting index in svalues for stuff going to 293 the ith processor 294 */ 295 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 296 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 297 CHKPTRQ(send_waits); 298 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 299 starts[0] = 0; 300 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 301 for ( i=0; i<N; i++ ) { 302 svalues[starts[owner[i]]++] = rows[i]; 303 } 304 ISRestoreIndices(is,&rows); 305 306 starts[0] = 0; 307 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 308 count = 0; 309 for ( i=0; i<size; i++ ) { 310 if (procs[i]) { 311 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 312 } 313 } 314 PetscFree(starts); 315 316 base = owners[rank]; 317 318 /* wait on receives */ 319 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 320 source = lens + nrecvs; 321 count = nrecvs; slen = 0; 322 while (count) { 323 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 324 /* unpack receives into our local space */ 325 MPI_Get_count(&recv_status,MPI_INT,&n); 326 source[imdex] = recv_status.MPI_SOURCE; 327 lens[imdex] = n; 328 slen += n; 329 count--; 330 } 331 PetscFree(recv_waits); 332 333 /* move the data into the send scatter */ 334 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 335 count = 0; 336 for ( i=0; i<nrecvs; i++ ) { 337 values = rvalues + i*nmax; 338 for ( j=0; j<lens[i]; j++ ) { 339 lrows[count++] = values[j] - base; 340 } 341 } 342 PetscFree(rvalues); PetscFree(lens); 343 PetscFree(owner); PetscFree(nprocs); 344 345 /* actually zap the local rows */ 346 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 347 PLogObjectParent(A,istmp); 348 PetscFree(lrows); 349 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 350 ierr = ISDestroy(istmp); CHKERRQ(ierr); 351 352 /* wait on sends */ 353 if (nsends) { 354 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 355 CHKPTRQ(send_status); 356 MPI_Waitall(nsends,send_waits,send_status); 357 PetscFree(send_status); 358 } 359 PetscFree(send_waits); PetscFree(svalues); 360 361 return 0; 362 } 363 364 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 365 { 366 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 367 int ierr; 368 if (!mdn->assembled) 369 SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first"); 370 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 371 CHKERRQ(ierr); 372 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 373 CHKERRQ(ierr); 374 ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 375 return 0; 376 } 377 378 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 381 int ierr; 382 if (!mdn->assembled) 383 SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first"); 384 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 385 CHKERRQ(ierr); 386 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 387 CHKERRQ(ierr); 388 ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 389 return 0; 390 } 391 392 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 393 { 394 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 395 int ierr; 396 Scalar zero = 0.0; 397 398 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix"); 399 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 400 ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 401 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 402 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 403 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 404 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 405 return 0; 406 } 407 408 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 409 { 410 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 411 int ierr; 412 413 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix"); 414 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 415 ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 416 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 417 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 418 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 419 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 420 return 0; 421 } 422 423 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 424 { 425 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 426 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 427 int ierr, i, n, m = a->m, radd; 428 Scalar *x; 429 430 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix"); 431 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 432 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 433 if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 434 radd = a->rstart*m*m; 435 for ( i=0; i<m; i++ ) { 436 x[i] = aloc->v[radd + i*m + i]; 437 } 438 return 0; 439 } 440 441 static int MatDestroy_MPIDense(PetscObject obj) 442 { 443 Mat mat = (Mat) obj; 444 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 445 int ierr; 446 447 #if defined(PETSC_LOG) 448 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 449 #endif 450 PetscFree(mdn->rowners); 451 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 452 if (mdn->lvec) VecDestroy(mdn->lvec); 453 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 454 if (mdn->factor) PetscFree(mdn->factor); 455 PetscFree(mdn); 456 PLogObjectDestroy(mat); 457 PetscHeaderDestroy(mat); 458 return 0; 459 } 460 461 #include "pinclude/pviewer.h" 462 463 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 464 { 465 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 466 int ierr; 467 if (mdn->size == 1) { 468 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 469 } 470 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 471 return 0; 472 } 473 474 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 477 int ierr, format; 478 PetscObject vobj = (PetscObject) viewer; 479 FILE *fd; 480 481 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 482 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 483 ierr = ViewerFileGetFormat_Private(viewer,&format); 484 if (format == FILE_FORMAT_INFO_DETAILED) { 485 int nz, nzalloc, mem, rank; 486 MPI_Comm_rank(mat->comm,&rank); 487 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 488 MPIU_Seq_begin(mat->comm,1); 489 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 490 rank,mdn->m,nz,nzalloc,mem); 491 fflush(fd); 492 MPIU_Seq_end(mat->comm,1); 493 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 494 return 0; 495 } 496 else if (format == FILE_FORMAT_INFO) { 497 return 0; 498 } 499 } 500 if (vobj->type == ASCII_FILE_VIEWER) { 501 MPIU_Seq_begin(mat->comm,1); 502 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 503 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 504 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 505 fflush(fd); 506 MPIU_Seq_end(mat->comm,1); 507 } 508 else { 509 int size = mdn->size, rank = mdn->rank; 510 if (size == 1) { 511 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 512 } 513 else { 514 /* assemble the entire matrix onto first processor. */ 515 Mat A; 516 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 517 Scalar *vals; 518 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 519 520 if (!rank) { 521 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 522 } 523 else { 524 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 525 } 526 PLogObjectParent(mat,A); 527 528 /* Copy the matrix ... This isn't the most efficient means, 529 but it's quick for now */ 530 row = mdn->rstart; m = Amdn->m; 531 for ( i=0; i<m; i++ ) { 532 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 533 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 534 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 535 row++; 536 } 537 538 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 539 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 540 if (!rank) { 541 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 542 } 543 ierr = MatDestroy(A); CHKERRQ(ierr); 544 } 545 } 546 return 0; 547 } 548 549 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 550 { 551 Mat mat = (Mat) obj; 552 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 553 PetscObject vobj = (PetscObject) viewer; 554 int ierr; 555 556 if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix"); 557 if (!viewer) { 558 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 559 } 560 if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 561 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 562 } 563 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 564 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 565 } 566 else if (vobj->type == BINARY_FILE_VIEWER) { 567 return MatView_MPIDense_Binary(mat,viewer); 568 } 569 return 0; 570 } 571 572 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 573 int *nzalloc,int *mem) 574 { 575 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 576 Mat mdn = mat->A; 577 int ierr, isend[3], irecv[3]; 578 579 ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 580 if (flag == MAT_LOCAL) { 581 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 582 } else if (flag == MAT_GLOBAL_MAX) { 583 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 584 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 585 } else if (flag == MAT_GLOBAL_SUM) { 586 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 587 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 588 } 589 return 0; 590 } 591 592 extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 593 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 594 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 595 extern int MatSolve_MPIDense(Mat,Vec,Vec); 596 /* extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 597 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 598 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 599 600 static int MatSetOption_MPIDense(Mat A,MatOption op) 601 { 602 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 603 604 if (op == NO_NEW_NONZERO_LOCATIONS || 605 op == YES_NEW_NONZERO_LOCATIONS || 606 op == COLUMNS_SORTED || 607 op == ROW_ORIENTED) { 608 MatSetOption(a->A,op); 609 } 610 else if (op == ROWS_SORTED || 611 op == SYMMETRIC_MATRIX || 612 op == STRUCTURALLY_SYMMETRIC_MATRIX || 613 op == YES_NEW_DIAGONALS) 614 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 615 else if (op == COLUMN_ORIENTED) 616 { a->roworiented = 0; MatSetOption(a->A,op);} 617 else if (op == NO_NEW_DIAGONALS) 618 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 619 else 620 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 621 return 0; 622 } 623 624 static int MatGetSize_MPIDense(Mat A,int *m,int *n) 625 { 626 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 627 *m = mat->M; *n = mat->N; 628 return 0; 629 } 630 631 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 632 { 633 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 634 *m = mat->m; *n = mat->N; 635 return 0; 636 } 637 638 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 639 { 640 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 641 *m = mat->rstart; *n = mat->rend; 642 return 0; 643 } 644 645 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 646 { 647 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 648 int lrow, rstart = mat->rstart, rend = mat->rend; 649 650 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 651 lrow = row - rstart; 652 return MatGetRow(mat->A,lrow,nz,idx,v); 653 } 654 655 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 656 { 657 if (idx) PetscFree(*idx); 658 if (v) PetscFree(*v); 659 return 0; 660 } 661 662 static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 663 { 664 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 665 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 666 int ierr, i, j; 667 double sum = 0.0; 668 Scalar *v = mat->v; 669 670 if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix"); 671 if (mdn->size == 1) { 672 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 673 } else { 674 if (type == NORM_FROBENIUS) { 675 for (i=0; i<mat->n*mat->m; i++ ) { 676 #if defined(PETSC_COMPLEX) 677 sum += real(conj(*v)*(*v)); v++; 678 #else 679 sum += (*v)*(*v); v++; 680 #endif 681 } 682 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 683 *norm = sqrt(*norm); 684 PLogFlops(2*mat->n*mat->m); 685 } 686 else if (type == NORM_1) { 687 double *tmp, *tmp2; 688 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 689 tmp2 = tmp + mdn->N; 690 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 691 *norm = 0.0; 692 v = mat->v; 693 for ( j=0; j<mat->n; j++ ) { 694 for ( i=0; i<mat->m; i++ ) { 695 tmp[j] += PetscAbsScalar(*v); v++; 696 } 697 } 698 MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 699 for ( j=0; j<mdn->N; j++ ) { 700 if (tmp2[j] > *norm) *norm = tmp2[j]; 701 } 702 PetscFree(tmp); 703 PLogFlops(mat->n*mat->m); 704 } 705 else if (type == NORM_INFINITY) { /* max row norm */ 706 double ntemp; 707 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 708 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 709 } 710 else { 711 SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 712 } 713 } 714 return 0; 715 } 716 717 static int MatTranspose_MPIDense(Mat A,Mat *matout) 718 { 719 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 720 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 721 Mat B; 722 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 723 int j, i, ierr; 724 Scalar *v; 725 726 if (!matout && M != N) 727 SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 728 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B); 729 CHKERRQ(ierr); 730 731 m = Aloc->m; n = Aloc->n; v = Aloc->v; 732 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 733 for ( j=0; j<n; j++ ) { 734 for (i=0; i<m; i++) rwork[i] = rstart + i; 735 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 736 v += m; 737 } 738 PetscFree(rwork); 739 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 740 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 741 if (matout) { 742 *matout = B; 743 } else { 744 /* This isn't really an in-place transpose, but free data struct from a */ 745 PetscFree(a->rowners); 746 ierr = MatDestroy(a->A); CHKERRQ(ierr); 747 if (a->lvec) VecDestroy(a->lvec); 748 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 749 PetscFree(a); 750 PetscMemcpy(A,B,sizeof(struct _Mat)); 751 PetscHeaderDestroy(B); 752 } 753 return 0; 754 } 755 756 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 757 758 /* -------------------------------------------------------------------*/ 759 static struct _MatOps MatOps = {MatSetValues_MPIDense, 760 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 761 MatMult_MPIDense,MatMultAdd_MPIDense, 762 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 763 MatSolve_MPIDense,0, 764 0,0, 765 MatLUFactor_MPIDense,0, 766 0,MatTranspose_MPIDense, 767 MatGetInfo_MPIDense,0, 768 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 769 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 770 0, 771 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 772 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, 773 0,0, 774 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 775 MatGetOwnershipRange_MPIDense, 776 0,0, 777 0,0,0,0,0,MatConvertSameType_MPIDense, 778 0,0,0,0,0, 779 0,0,MatGetValues_MPIDense}; 780 781 /*@C 782 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 783 784 Input Parameters: 785 . comm - MPI communicator 786 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 787 . n - number of local columns (or PETSC_DECIDE to have calculated 788 if N is given) 789 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 790 . N - number of global columns (or PETSC_DECIDE to have calculated 791 if n is given) 792 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 793 to control all matrix memory allocation. 794 795 Output Parameter: 796 . newmat - the matrix 797 798 Notes: 799 The dense format is fully compatible with standard Fortran 77 800 storage by columns. 801 802 The data input variable is intended primarily for Fortran programmers 803 who wish to allocate their own matrix memory space. Most users should 804 set data=PETSC_NULL. 805 806 The user MUST specify either the local or global matrix dimensions 807 (possibly both). 808 809 Currently, the only parallel dense matrix decomposition is by rows, 810 so that n=N and each submatrix owns all of the global columns. 811 812 .keywords: matrix, dense, parallel 813 814 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 815 @*/ 816 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat) 817 { 818 Mat mat; 819 Mat_MPIDense *a; 820 int ierr, i; 821 822 /* Note: For now, when data is specified above, this assumes the user correctly 823 allocates the local dense storage space. We should add error checking. */ 824 825 *newmat = 0; 826 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 827 PLogObjectCreate(mat); 828 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 829 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 830 mat->destroy = MatDestroy_MPIDense; 831 mat->view = MatView_MPIDense; 832 mat->factor = 0; 833 834 a->insertmode = NOT_SET_VALUES; 835 MPI_Comm_rank(comm,&a->rank); 836 MPI_Comm_size(comm,&a->size); 837 838 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 839 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 840 841 /* each row stores all columns */ 842 if (N == PETSC_DECIDE) N = n; 843 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 844 /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 845 a->N = N; 846 a->M = M; 847 a->m = m; 848 a->n = n; 849 850 /* build local table of row and column ownerships */ 851 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 852 a->cowners = a->rowners + a->size + 1; 853 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 854 sizeof(Mat_MPIDense)); 855 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 856 a->rowners[0] = 0; 857 for ( i=2; i<=a->size; i++ ) { 858 a->rowners[i] += a->rowners[i-1]; 859 } 860 a->rstart = a->rowners[a->rank]; 861 a->rend = a->rowners[a->rank+1]; 862 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 863 a->cowners[0] = 0; 864 for ( i=2; i<=a->size; i++ ) { 865 a->cowners[i] += a->cowners[i-1]; 866 } 867 868 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 869 PLogObjectParent(mat,a->A); 870 871 /* build cache for off array entries formed */ 872 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 873 874 /* stuff used for matrix vector multiply */ 875 a->lvec = 0; 876 a->Mvctx = 0; 877 a->assembled = 0; 878 a->roworiented = 1; 879 880 *newmat = mat; 881 return 0; 882 } 883 884 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 885 { 886 Mat mat; 887 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 888 int ierr; 889 FactorCtx *factor; 890 891 if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIDense:Must assemble matrix"); 892 *newmat = 0; 893 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 894 PLogObjectCreate(mat); 895 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 896 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 897 mat->destroy = MatDestroy_MPIDense; 898 mat->view = MatView_MPIDense; 899 mat->factor = A->factor; 900 901 a->m = oldmat->m; 902 a->n = oldmat->n; 903 a->M = oldmat->M; 904 a->N = oldmat->N; 905 if (oldmat->factor) { 906 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 907 /* copy factor contents ... add this code! */ 908 } else a->factor = 0; 909 910 a->assembled = 1; 911 a->rstart = oldmat->rstart; 912 a->rend = oldmat->rend; 913 a->size = oldmat->size; 914 a->rank = oldmat->rank; 915 a->insertmode = NOT_SET_VALUES; 916 917 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 918 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 919 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 920 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 921 922 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 923 PLogObjectParent(mat,a->lvec); 924 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 925 PLogObjectParent(mat,a->Mvctx); 926 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 927 PLogObjectParent(mat,a->A); 928 *newmat = mat; 929 return 0; 930 } 931 932 #include "sysio.h" 933 934 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 935 { 936 Mat A; 937 int i, nz, ierr, j,rstart, rend, fd; 938 Scalar *vals,*svals; 939 PetscObject vobj = (PetscObject) bview; 940 MPI_Comm comm = vobj->comm; 941 MPI_Status status; 942 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 943 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 944 int tag = ((PetscObject)bview)->tag; 945 946 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 947 if (!rank) { 948 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 949 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 950 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 951 } 952 953 MPI_Bcast(header+1,3,MPI_INT,0,comm); 954 M = header[1]; N = header[2]; 955 /* determine ownership of all rows */ 956 m = M/size + ((M % size) > rank); 957 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 958 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 959 rowners[0] = 0; 960 for ( i=2; i<=size; i++ ) { 961 rowners[i] += rowners[i-1]; 962 } 963 rstart = rowners[rank]; 964 rend = rowners[rank+1]; 965 966 /* distribute row lengths to all processors */ 967 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 968 offlens = ourlens + (rend-rstart); 969 if (!rank) { 970 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 971 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 972 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 973 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 974 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 975 PetscFree(sndcounts); 976 } 977 else { 978 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 979 } 980 981 if (!rank) { 982 /* calculate the number of nonzeros on each processor */ 983 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 984 PetscMemzero(procsnz,size*sizeof(int)); 985 for ( i=0; i<size; i++ ) { 986 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 987 procsnz[i] += rowlengths[j]; 988 } 989 } 990 PetscFree(rowlengths); 991 992 /* determine max buffer needed and allocate it */ 993 maxnz = 0; 994 for ( i=0; i<size; i++ ) { 995 maxnz = PetscMax(maxnz,procsnz[i]); 996 } 997 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 998 999 /* read in my part of the matrix column indices */ 1000 nz = procsnz[0]; 1001 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1002 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1003 1004 /* read in every one elses and ship off */ 1005 for ( i=1; i<size; i++ ) { 1006 nz = procsnz[i]; 1007 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1008 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1009 } 1010 PetscFree(cols); 1011 } 1012 else { 1013 /* determine buffer space needed for message */ 1014 nz = 0; 1015 for ( i=0; i<m; i++ ) { 1016 nz += ourlens[i]; 1017 } 1018 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1019 1020 /* receive message of column indices*/ 1021 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1022 MPI_Get_count(&status,MPI_INT,&maxnz); 1023 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1024 } 1025 1026 /* loop over local rows, determining number of off diagonal entries */ 1027 PetscMemzero(offlens,m*sizeof(int)); 1028 jj = 0; 1029 for ( i=0; i<m; i++ ) { 1030 for ( j=0; j<ourlens[i]; j++ ) { 1031 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1032 jj++; 1033 } 1034 } 1035 1036 /* create our matrix */ 1037 for ( i=0; i<m; i++ ) { 1038 ourlens[i] -= offlens[i]; 1039 } 1040 if (type == MATMPIDENSE) { 1041 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat); CHKERRQ(ierr); 1042 } 1043 A = *newmat; 1044 for ( i=0; i<m; i++ ) { 1045 ourlens[i] += offlens[i]; 1046 } 1047 1048 if (!rank) { 1049 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1050 1051 /* read in my part of the matrix numerical values */ 1052 nz = procsnz[0]; 1053 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1054 1055 /* insert into matrix */ 1056 jj = rstart; 1057 smycols = mycols; 1058 svals = vals; 1059 for ( i=0; i<m; i++ ) { 1060 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1061 smycols += ourlens[i]; 1062 svals += ourlens[i]; 1063 jj++; 1064 } 1065 1066 /* read in other processors and ship out */ 1067 for ( i=1; i<size; i++ ) { 1068 nz = procsnz[i]; 1069 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1070 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1071 } 1072 PetscFree(procsnz); 1073 } 1074 else { 1075 /* receive numeric values */ 1076 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1077 1078 /* receive message of values*/ 1079 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1080 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1081 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1082 1083 /* insert into matrix */ 1084 jj = rstart; 1085 smycols = mycols; 1086 svals = vals; 1087 for ( i=0; i<m; i++ ) { 1088 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1089 smycols += ourlens[i]; 1090 svals += ourlens[i]; 1091 jj++; 1092 } 1093 } 1094 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1095 1096 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1097 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1098 return 0; 1099 } 1100