1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.58 1996/12/01 17:55:13 curfman Exp balay $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNCTION__ 13 #define __FUNCTION__ "MatSetValues_MPIDense" 14 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 15 { 16 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 17 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 18 int roworiented = A->roworiented; 19 20 if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 21 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 22 } 23 A->insertmode = addv; 24 for ( i=0; i<m; i++ ) { 25 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 26 if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 27 if (idxm[i] >= rstart && idxm[i] < rend) { 28 row = idxm[i] - rstart; 29 if (roworiented) { 30 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 31 } 32 else { 33 for ( j=0; j<n; j++ ) { 34 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 35 if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 36 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 37 } 38 } 39 } 40 else { 41 if (roworiented) { 42 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 43 } 44 else { /* must stash each seperately */ 45 row = idxm[i]; 46 for ( j=0; j<n; j++ ) { 47 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 48 } 49 } 50 } 51 } 52 return 0; 53 } 54 55 #undef __FUNCTION__ 56 #define __FUNCTION__ "MatGetValues_MPIDense" 57 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 58 { 59 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 60 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 61 62 for ( i=0; i<m; i++ ) { 63 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 64 if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 65 if (idxm[i] >= rstart && idxm[i] < rend) { 66 row = idxm[i] - rstart; 67 for ( j=0; j<n; j++ ) { 68 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 69 if (idxn[j] >= mdn->N) 70 SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 71 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 72 } 73 } 74 else { 75 SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 76 } 77 } 78 return 0; 79 } 80 81 #undef __FUNCTION__ 82 #define __FUNCTION__ "MatGetArray_MPIDense" 83 static int MatGetArray_MPIDense(Mat A,Scalar **array) 84 { 85 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 86 int ierr; 87 88 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 89 return 0; 90 } 91 92 #undef __FUNCTION__ 93 #define __FUNCTION__ "MatRestoreArray_MPIDense" 94 static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 95 { 96 return 0; 97 } 98 99 #undef __FUNCTION__ 100 #define __FUNCTION__ "MatAssemblyBegin_MPIDense" 101 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 102 { 103 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 104 MPI_Comm comm = mat->comm; 105 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 106 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 107 int tag = mat->tag, *owner,*starts,count,ierr; 108 InsertMode addv; 109 MPI_Request *send_waits,*recv_waits; 110 Scalar *rvalues,*svalues; 111 112 /* make sure all processors are either in INSERTMODE or ADDMODE */ 113 MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 114 if (addv == (ADD_VALUES|INSERT_VALUES)) { 115 SETERRQ(1,"MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 116 } 117 mdn->insertmode = addv; /* in case this processor had no cache */ 118 119 /* first count number of contributors to each processor */ 120 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 121 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 122 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 123 for ( i=0; i<mdn->stash.n; i++ ) { 124 idx = mdn->stash.idx[i]; 125 for ( j=0; j<size; j++ ) { 126 if (idx >= owners[j] && idx < owners[j+1]) { 127 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 128 } 129 } 130 } 131 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 132 133 /* inform other processors of number of messages and max length*/ 134 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 135 MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 136 nreceives = work[rank]; 137 if (nreceives > size) SETERRQ(1,"MatAssemblyBegin_MPIDense:Internal PETSc error"); 138 MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 139 nmax = work[rank]; 140 PetscFree(work); 141 142 /* post receives: 143 1) each message will consist of ordered pairs 144 (global index,value) we store the global index as a double 145 to simplify the message passing. 146 2) since we don't know how long each individual message is we 147 allocate the largest needed buffer for each receive. Potentially 148 this is a lot of wasted space. 149 150 This could be done better. 151 */ 152 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 153 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 154 for ( i=0; i<nreceives; i++ ) { 155 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 156 } 157 158 /* do sends: 159 1) starts[i] gives the starting index in svalues for stuff going to 160 the ith processor 161 */ 162 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 163 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 164 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 165 starts[0] = 0; 166 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 167 for ( i=0; i<mdn->stash.n; i++ ) { 168 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 169 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 170 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 171 } 172 PetscFree(owner); 173 starts[0] = 0; 174 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 175 count = 0; 176 for ( i=0; i<size; i++ ) { 177 if (procs[i]) { 178 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 179 } 180 } 181 PetscFree(starts); PetscFree(nprocs); 182 183 /* Free cache space */ 184 PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 185 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 186 187 mdn->svalues = svalues; mdn->rvalues = rvalues; 188 mdn->nsends = nsends; mdn->nrecvs = nreceives; 189 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 190 mdn->rmax = nmax; 191 192 return 0; 193 } 194 extern int MatSetUpMultiply_MPIDense(Mat); 195 196 #undef __FUNCTION__ 197 #define __FUNCTION__ "MatAssemblyEnd_MPIDense" 198 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 199 { 200 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 201 MPI_Status *send_status,recv_status; 202 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 203 Scalar *values,val; 204 InsertMode addv = mdn->insertmode; 205 206 /* wait on receives */ 207 while (count) { 208 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 209 /* unpack receives into our local space */ 210 values = mdn->rvalues + 3*imdex*mdn->rmax; 211 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 212 n = n/3; 213 for ( i=0; i<n; i++ ) { 214 row = (int) PetscReal(values[3*i]) - mdn->rstart; 215 col = (int) PetscReal(values[3*i+1]); 216 val = values[3*i+2]; 217 if (col >= 0 && col < mdn->N) { 218 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 219 } 220 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 221 } 222 count--; 223 } 224 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 225 226 /* wait on sends */ 227 if (mdn->nsends) { 228 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 229 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 230 PetscFree(send_status); 231 } 232 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 233 234 mdn->insertmode = NOT_SET_VALUES; 235 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 237 238 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 239 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 240 } 241 return 0; 242 } 243 244 #undef __FUNCTION__ 245 #define __FUNCTION__ "MatZeroEntries_MPIDense" 246 static int MatZeroEntries_MPIDense(Mat A) 247 { 248 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 249 return MatZeroEntries(l->A); 250 } 251 252 #undef __FUNCTION__ 253 #define __FUNCTION__ "MatGetBlockSize_MPIDense" 254 static int MatGetBlockSize_MPIDense(Mat A,int *bs) 255 { 256 *bs = 1; 257 return 0; 258 } 259 260 /* the code does not do the diagonal entries correctly unless the 261 matrix is square and the column and row owerships are identical. 262 This is a BUG. The only way to fix it seems to be to access 263 mdn->A and mdn->B directly and not through the MatZeroRows() 264 routine. 265 */ 266 #undef __FUNCTION__ 267 #define __FUNCTION__ "MatZeroRows_MPIDense" 268 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 269 { 270 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 271 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 272 int *procs,*nprocs,j,found,idx,nsends,*work; 273 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 274 int *rvalues,tag = A->tag,count,base,slen,n,*source; 275 int *lens,imdex,*lrows,*values; 276 MPI_Comm comm = A->comm; 277 MPI_Request *send_waits,*recv_waits; 278 MPI_Status recv_status,*send_status; 279 IS istmp; 280 281 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 286 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 287 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 288 for ( i=0; i<N; i++ ) { 289 idx = rows[i]; 290 found = 0; 291 for ( j=0; j<size; j++ ) { 292 if (idx >= owners[j] && idx < owners[j+1]) { 293 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 294 } 295 } 296 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 297 } 298 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 299 300 /* inform other processors of number of messages and max length*/ 301 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 302 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 303 nrecvs = work[rank]; 304 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 305 nmax = work[rank]; 306 PetscFree(work); 307 308 /* post receives: */ 309 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 310 CHKPTRQ(rvalues); 311 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 312 CHKPTRQ(recv_waits); 313 for ( i=0; i<nrecvs; i++ ) { 314 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 315 } 316 317 /* do sends: 318 1) starts[i] gives the starting index in svalues for stuff going to 319 the ith processor 320 */ 321 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 322 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 323 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 324 starts[0] = 0; 325 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 326 for ( i=0; i<N; i++ ) { 327 svalues[starts[owner[i]]++] = rows[i]; 328 } 329 ISRestoreIndices(is,&rows); 330 331 starts[0] = 0; 332 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 333 count = 0; 334 for ( i=0; i<size; i++ ) { 335 if (procs[i]) { 336 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 337 } 338 } 339 PetscFree(starts); 340 341 base = owners[rank]; 342 343 /* wait on receives */ 344 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 345 source = lens + nrecvs; 346 count = nrecvs; slen = 0; 347 while (count) { 348 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 349 /* unpack receives into our local space */ 350 MPI_Get_count(&recv_status,MPI_INT,&n); 351 source[imdex] = recv_status.MPI_SOURCE; 352 lens[imdex] = n; 353 slen += n; 354 count--; 355 } 356 PetscFree(recv_waits); 357 358 /* move the data into the send scatter */ 359 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 360 count = 0; 361 for ( i=0; i<nrecvs; i++ ) { 362 values = rvalues + i*nmax; 363 for ( j=0; j<lens[i]; j++ ) { 364 lrows[count++] = values[j] - base; 365 } 366 } 367 PetscFree(rvalues); PetscFree(lens); 368 PetscFree(owner); PetscFree(nprocs); 369 370 /* actually zap the local rows */ 371 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 372 PLogObjectParent(A,istmp); 373 PetscFree(lrows); 374 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 375 ierr = ISDestroy(istmp); CHKERRQ(ierr); 376 377 /* wait on sends */ 378 if (nsends) { 379 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 380 MPI_Waitall(nsends,send_waits,send_status); 381 PetscFree(send_status); 382 } 383 PetscFree(send_waits); PetscFree(svalues); 384 385 return 0; 386 } 387 388 #undef __FUNCTION__ 389 #define __FUNCTION__ "MatMult_MPIDense" 390 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 391 { 392 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 393 int ierr; 394 395 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 396 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 397 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 398 return 0; 399 } 400 401 #undef __FUNCTION__ 402 #define __FUNCTION__ "MatMultAdd_MPIDense" 403 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 404 { 405 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 406 int ierr; 407 408 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 409 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 410 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 411 return 0; 412 } 413 414 #undef __FUNCTION__ 415 #define __FUNCTION__ "MatMultTrans_MPIDense" 416 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 417 { 418 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 419 int ierr; 420 Scalar zero = 0.0; 421 422 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 423 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 424 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 425 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 426 return 0; 427 } 428 429 #undef __FUNCTION__ 430 #define __FUNCTION__ "MatMultTransAdd_MPIDense" 431 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 432 { 433 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 434 int ierr; 435 436 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 437 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 438 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 439 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 440 return 0; 441 } 442 443 #undef __FUNCTION__ 444 #define __FUNCTION__ "MatGetDiagonal_MPIDense" 445 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 446 { 447 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 448 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 449 int ierr, len, i, n, m = a->m, radd; 450 Scalar *x, zero = 0.0; 451 452 VecSet(&zero,v); 453 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 454 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 455 if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 456 len = PetscMin(aloc->m,aloc->n); 457 radd = a->rstart*m; 458 for ( i=0; i<len; i++ ) { 459 x[i] = aloc->v[radd + i*m + i]; 460 } 461 return 0; 462 } 463 464 #undef __FUNCTION__ 465 #define __FUNCTION__ "MatDestroy_MPIDense" 466 static int MatDestroy_MPIDense(PetscObject obj) 467 { 468 Mat mat = (Mat) obj; 469 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 470 int ierr; 471 472 #if defined(PETSC_LOG) 473 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 474 #endif 475 PetscFree(mdn->rowners); 476 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 477 if (mdn->lvec) VecDestroy(mdn->lvec); 478 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 479 if (mdn->factor) { 480 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 481 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 482 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 483 PetscFree(mdn->factor); 484 } 485 PetscFree(mdn); 486 if (mat->mapping) { 487 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 488 } 489 PLogObjectDestroy(mat); 490 PetscHeaderDestroy(mat); 491 return 0; 492 } 493 494 #include "pinclude/pviewer.h" 495 496 #undef __FUNCTION__ 497 #define __FUNCTION__ "MatView_MPIDense_Binary" 498 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 499 { 500 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 501 int ierr; 502 503 if (mdn->size == 1) { 504 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 505 } 506 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 507 return 0; 508 } 509 510 #undef __FUNCTION__ 511 #define __FUNCTION__ "MatView_MPIDense_ASCII" 512 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 513 { 514 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 515 int ierr, format; 516 FILE *fd; 517 ViewerType vtype; 518 519 ViewerGetType(viewer,&vtype); 520 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 521 ierr = ViewerGetFormat(viewer,&format); 522 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 523 int rank; 524 MatInfo info; 525 MPI_Comm_rank(mat->comm,&rank); 526 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 527 PetscSequentialPhaseBegin(mat->comm,1); 528 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 529 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 530 fflush(fd); 531 PetscSequentialPhaseEnd(mat->comm,1); 532 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 533 return 0; 534 } 535 else if (format == VIEWER_FORMAT_ASCII_INFO) { 536 return 0; 537 } 538 if (vtype == ASCII_FILE_VIEWER) { 539 PetscSequentialPhaseBegin(mat->comm,1); 540 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 541 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 542 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 543 fflush(fd); 544 PetscSequentialPhaseEnd(mat->comm,1); 545 } 546 else { 547 int size = mdn->size, rank = mdn->rank; 548 if (size == 1) { 549 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 550 } 551 else { 552 /* assemble the entire matrix onto first processor. */ 553 Mat A; 554 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 555 Scalar *vals; 556 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 557 558 if (!rank) { 559 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 560 } 561 else { 562 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 563 } 564 PLogObjectParent(mat,A); 565 566 /* Copy the matrix ... This isn't the most efficient means, 567 but it's quick for now */ 568 row = mdn->rstart; m = Amdn->m; 569 for ( i=0; i<m; i++ ) { 570 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 571 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 572 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 573 row++; 574 } 575 576 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 577 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 578 if (!rank) { 579 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 580 } 581 ierr = MatDestroy(A); CHKERRQ(ierr); 582 } 583 } 584 return 0; 585 } 586 587 #undef __FUNCTION__ 588 #define __FUNCTION__ "MatView_MPIDense" 589 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 590 { 591 Mat mat = (Mat) obj; 592 int ierr; 593 ViewerType vtype; 594 595 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 596 if (vtype == ASCII_FILE_VIEWER) { 597 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 598 } 599 else if (vtype == ASCII_FILES_VIEWER) { 600 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 601 } 602 else if (vtype == BINARY_FILE_VIEWER) { 603 return MatView_MPIDense_Binary(mat,viewer); 604 } 605 return 0; 606 } 607 608 #undef __FUNCTION__ 609 #define __FUNCTION__ "MatGetInfo_MPIDense" 610 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 611 { 612 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 613 Mat mdn = mat->A; 614 int ierr; 615 double isend[5], irecv[5]; 616 617 info->rows_global = (double)mat->M; 618 info->columns_global = (double)mat->N; 619 info->rows_local = (double)mat->m; 620 info->columns_local = (double)mat->N; 621 info->block_size = 1.0; 622 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 623 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 624 isend[3] = info->memory; isend[4] = info->mallocs; 625 if (flag == MAT_LOCAL) { 626 info->nz_used = isend[0]; 627 info->nz_allocated = isend[1]; 628 info->nz_unneeded = isend[2]; 629 info->memory = isend[3]; 630 info->mallocs = isend[4]; 631 } else if (flag == MAT_GLOBAL_MAX) { 632 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 633 info->nz_used = irecv[0]; 634 info->nz_allocated = irecv[1]; 635 info->nz_unneeded = irecv[2]; 636 info->memory = irecv[3]; 637 info->mallocs = irecv[4]; 638 } else if (flag == MAT_GLOBAL_SUM) { 639 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 640 info->nz_used = irecv[0]; 641 info->nz_allocated = irecv[1]; 642 info->nz_unneeded = irecv[2]; 643 info->memory = irecv[3]; 644 info->mallocs = irecv[4]; 645 } 646 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 647 info->fill_ratio_needed = 0; 648 info->factor_mallocs = 0; 649 return 0; 650 } 651 652 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 653 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 654 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 655 extern int MatSolve_MPIDense(Mat,Vec,Vec); 656 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 657 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 658 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 659 660 #undef __FUNCTION__ 661 #define __FUNCTION__ "MatSetOption_MPIDense" 662 static int MatSetOption_MPIDense(Mat A,MatOption op) 663 { 664 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 665 666 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 667 op == MAT_YES_NEW_NONZERO_LOCATIONS || 668 op == MAT_COLUMNS_SORTED || 669 op == MAT_COLUMNS_UNSORTED) { 670 MatSetOption(a->A,op); 671 } else if (op == MAT_ROW_ORIENTED) { 672 a->roworiented = 1; 673 MatSetOption(a->A,op); 674 } else if (op == MAT_ROWS_SORTED || 675 op == MAT_ROWS_UNSORTED || 676 op == MAT_SYMMETRIC || 677 op == MAT_STRUCTURALLY_SYMMETRIC || 678 op == MAT_YES_NEW_DIAGONALS) 679 PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 680 else if (op == MAT_COLUMN_ORIENTED) 681 {a->roworiented = 0; MatSetOption(a->A,op);} 682 else if (op == MAT_NO_NEW_DIAGONALS) 683 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");} 684 else 685 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 686 return 0; 687 } 688 689 #undef __FUNCTION__ 690 #define __FUNCTION__ "MatGetSize_MPIDense" 691 static int MatGetSize_MPIDense(Mat A,int *m,int *n) 692 { 693 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 694 *m = mat->M; *n = mat->N; 695 return 0; 696 } 697 698 #undef __FUNCTION__ 699 #define __FUNCTION__ "MatGetLocalSize_MPIDense" 700 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 701 { 702 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 703 *m = mat->m; *n = mat->N; 704 return 0; 705 } 706 707 #undef __FUNCTION__ 708 #define __FUNCTION__ "MatGetOwnershipRange_MPIDense" 709 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 710 { 711 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 712 *m = mat->rstart; *n = mat->rend; 713 return 0; 714 } 715 716 #undef __FUNCTION__ 717 #define __FUNCTION__ "MatGetRow_MPIDense" 718 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 719 { 720 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 721 int lrow, rstart = mat->rstart, rend = mat->rend; 722 723 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 724 lrow = row - rstart; 725 return MatGetRow(mat->A,lrow,nz,idx,v); 726 } 727 728 #undef __FUNCTION__ 729 #define __FUNCTION__ "MatRestoreRow_MPIDense" 730 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 731 { 732 if (idx) PetscFree(*idx); 733 if (v) PetscFree(*v); 734 return 0; 735 } 736 737 #undef __FUNCTION__ 738 #define __FUNCTION__ "MatNorm_MPIDense" 739 static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 740 { 741 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 742 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 743 int ierr, i, j; 744 double sum = 0.0; 745 Scalar *v = mat->v; 746 747 if (mdn->size == 1) { 748 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 749 } else { 750 if (type == NORM_FROBENIUS) { 751 for (i=0; i<mat->n*mat->m; i++ ) { 752 #if defined(PETSC_COMPLEX) 753 sum += real(conj(*v)*(*v)); v++; 754 #else 755 sum += (*v)*(*v); v++; 756 #endif 757 } 758 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 759 *norm = sqrt(*norm); 760 PLogFlops(2*mat->n*mat->m); 761 } 762 else if (type == NORM_1) { 763 double *tmp, *tmp2; 764 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 765 tmp2 = tmp + mdn->N; 766 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 767 *norm = 0.0; 768 v = mat->v; 769 for ( j=0; j<mat->n; j++ ) { 770 for ( i=0; i<mat->m; i++ ) { 771 tmp[j] += PetscAbsScalar(*v); v++; 772 } 773 } 774 MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 775 for ( j=0; j<mdn->N; j++ ) { 776 if (tmp2[j] > *norm) *norm = tmp2[j]; 777 } 778 PetscFree(tmp); 779 PLogFlops(mat->n*mat->m); 780 } 781 else if (type == NORM_INFINITY) { /* max row norm */ 782 double ntemp; 783 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 784 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 785 } 786 else { 787 SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 788 } 789 } 790 return 0; 791 } 792 793 #undef __FUNCTION__ 794 #define __FUNCTION__ "MatTranspose_MPIDense" 795 static int MatTranspose_MPIDense(Mat A,Mat *matout) 796 { 797 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 798 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 799 Mat B; 800 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 801 int j, i, ierr; 802 Scalar *v; 803 804 if (matout == PETSC_NULL && M != N) { 805 SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 806 } 807 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 808 809 m = Aloc->m; n = Aloc->n; v = Aloc->v; 810 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 811 for ( j=0; j<n; j++ ) { 812 for (i=0; i<m; i++) rwork[i] = rstart + i; 813 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 814 v += m; 815 } 816 PetscFree(rwork); 817 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 818 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 819 if (matout != PETSC_NULL) { 820 *matout = B; 821 } else { 822 /* This isn't really an in-place transpose, but free data struct from a */ 823 PetscFree(a->rowners); 824 ierr = MatDestroy(a->A); CHKERRQ(ierr); 825 if (a->lvec) VecDestroy(a->lvec); 826 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 827 PetscFree(a); 828 PetscMemcpy(A,B,sizeof(struct _Mat)); 829 PetscHeaderDestroy(B); 830 } 831 return 0; 832 } 833 834 #include "pinclude/plapack.h" 835 #undef __FUNCTION__ 836 #define __FUNCTION__ "MatScale_MPIDense" 837 static int MatScale_MPIDense(Scalar *alpha,Mat inA) 838 { 839 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 840 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 841 int one = 1, nz; 842 843 nz = a->m*a->n; 844 BLscal_( &nz, alpha, a->v, &one ); 845 PLogFlops(nz); 846 return 0; 847 } 848 849 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 850 851 /* -------------------------------------------------------------------*/ 852 static struct _MatOps MatOps = {MatSetValues_MPIDense, 853 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 854 MatMult_MPIDense,MatMultAdd_MPIDense, 855 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 856 /* MatSolve_MPIDense,0, */ 857 0,0, 858 0,0, 859 0,0, 860 /* MatLUFactor_MPIDense,0, */ 861 0,MatTranspose_MPIDense, 862 MatGetInfo_MPIDense,0, 863 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 864 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 865 0, 866 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 867 0,0, 868 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 869 0,0, 870 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 871 MatGetOwnershipRange_MPIDense, 872 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 873 0,MatConvertSameType_MPIDense, 874 0,0,0,0,0, 875 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 876 0,0,0,MatGetBlockSize_MPIDense}; 877 878 #undef __FUNCTION__ 879 #define __FUNCTION__ "MatCreateMPIDense" 880 /*@C 881 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 882 883 Input Parameters: 884 . comm - MPI communicator 885 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 886 . n - number of local columns (or PETSC_DECIDE to have calculated 887 if N is given) 888 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 889 . N - number of global columns (or PETSC_DECIDE to have calculated 890 if n is given) 891 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 892 to control all matrix memory allocation. 893 894 Output Parameter: 895 . A - the matrix 896 897 Notes: 898 The dense format is fully compatible with standard Fortran 77 899 storage by columns. 900 901 The data input variable is intended primarily for Fortran programmers 902 who wish to allocate their own matrix memory space. Most users should 903 set data=PETSC_NULL. 904 905 The user MUST specify either the local or global matrix dimensions 906 (possibly both). 907 908 Currently, the only parallel dense matrix decomposition is by rows, 909 so that n=N and each submatrix owns all of the global columns. 910 911 .keywords: matrix, dense, parallel 912 913 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 914 @*/ 915 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 916 { 917 Mat mat; 918 Mat_MPIDense *a; 919 int ierr, i,flg; 920 921 /* Note: For now, when data is specified above, this assumes the user correctly 922 allocates the local dense storage space. We should add error checking. */ 923 924 *A = 0; 925 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 926 PLogObjectCreate(mat); 927 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 928 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 929 mat->destroy = MatDestroy_MPIDense; 930 mat->view = MatView_MPIDense; 931 mat->factor = 0; 932 mat->mapping = 0; 933 934 a->factor = 0; 935 a->insertmode = NOT_SET_VALUES; 936 MPI_Comm_rank(comm,&a->rank); 937 MPI_Comm_size(comm,&a->size); 938 939 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 940 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 941 942 /* each row stores all columns */ 943 if (N == PETSC_DECIDE) N = n; 944 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 945 /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 946 a->N = mat->N = N; 947 a->M = mat->M = M; 948 a->m = mat->m = m; 949 a->n = mat->n = n; 950 951 /* build local table of row and column ownerships */ 952 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 953 a->cowners = a->rowners + a->size + 1; 954 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 955 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 956 a->rowners[0] = 0; 957 for ( i=2; i<=a->size; i++ ) { 958 a->rowners[i] += a->rowners[i-1]; 959 } 960 a->rstart = a->rowners[a->rank]; 961 a->rend = a->rowners[a->rank+1]; 962 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 963 a->cowners[0] = 0; 964 for ( i=2; i<=a->size; i++ ) { 965 a->cowners[i] += a->cowners[i-1]; 966 } 967 968 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 969 PLogObjectParent(mat,a->A); 970 971 /* build cache for off array entries formed */ 972 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 973 974 /* stuff used for matrix vector multiply */ 975 a->lvec = 0; 976 a->Mvctx = 0; 977 a->roworiented = 1; 978 979 *A = mat; 980 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 981 if (flg) { 982 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 983 } 984 return 0; 985 } 986 987 #undef __FUNCTION__ 988 #define __FUNCTION__ "MatConvertSameType_MPIDense" 989 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 990 { 991 Mat mat; 992 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 993 int ierr; 994 FactorCtx *factor; 995 996 *newmat = 0; 997 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 998 PLogObjectCreate(mat); 999 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 1000 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1001 mat->destroy = MatDestroy_MPIDense; 1002 mat->view = MatView_MPIDense; 1003 mat->factor = A->factor; 1004 mat->assembled = PETSC_TRUE; 1005 1006 a->m = mat->m = oldmat->m; 1007 a->n = mat->n = oldmat->n; 1008 a->M = mat->M = oldmat->M; 1009 a->N = mat->N = oldmat->N; 1010 if (oldmat->factor) { 1011 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 1012 /* copy factor contents ... add this code! */ 1013 } else a->factor = 0; 1014 1015 a->rstart = oldmat->rstart; 1016 a->rend = oldmat->rend; 1017 a->size = oldmat->size; 1018 a->rank = oldmat->rank; 1019 a->insertmode = NOT_SET_VALUES; 1020 1021 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1022 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 1023 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1024 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1025 1026 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1027 PLogObjectParent(mat,a->lvec); 1028 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1029 PLogObjectParent(mat,a->Mvctx); 1030 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1031 PLogObjectParent(mat,a->A); 1032 *newmat = mat; 1033 return 0; 1034 } 1035 1036 #include "sys.h" 1037 1038 #undef __FUNCTION__ 1039 #define __FUNCTION__ "MatLoad_MPIDense_DenseInFile" 1040 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1041 { 1042 int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 1043 Scalar *array,*vals,*vals_ptr; 1044 MPI_Status status; 1045 1046 MPI_Comm_rank(comm,&rank); 1047 MPI_Comm_size(comm,&size); 1048 1049 /* determine ownership of all rows */ 1050 m = M/size + ((M % size) > rank); 1051 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1052 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1053 rowners[0] = 0; 1054 for ( i=2; i<=size; i++ ) { 1055 rowners[i] += rowners[i-1]; 1056 } 1057 rstart = rowners[rank]; 1058 rend = rowners[rank+1]; 1059 1060 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1061 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 1062 1063 if (!rank) { 1064 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1065 1066 /* read in my part of the matrix numerical values */ 1067 ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 1068 1069 /* insert into matrix-by row (this is why cannot directly read into array */ 1070 vals_ptr = vals; 1071 for ( i=0; i<m; i++ ) { 1072 for ( j=0; j<N; j++ ) { 1073 array[i + j*m] = *vals_ptr++; 1074 } 1075 } 1076 1077 /* read in other processors and ship out */ 1078 for ( i=1; i<size; i++ ) { 1079 nz = (rowners[i+1] - rowners[i])*N; 1080 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1081 MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 1082 } 1083 } 1084 else { 1085 /* receive numeric values */ 1086 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1087 1088 /* receive message of values*/ 1089 MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 1090 1091 /* insert into matrix-by row (this is why cannot directly read into array */ 1092 vals_ptr = vals; 1093 for ( i=0; i<m; i++ ) { 1094 for ( j=0; j<N; j++ ) { 1095 array[i + j*m] = *vals_ptr++; 1096 } 1097 } 1098 } 1099 PetscFree(rowners); 1100 PetscFree(vals); 1101 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1102 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1103 return 0; 1104 } 1105 1106 1107 #undef __FUNCTION__ 1108 #define __FUNCTION__ "MatLoad_MPIDense" 1109 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1110 { 1111 Mat A; 1112 int i, nz, ierr, j,rstart, rend, fd; 1113 Scalar *vals,*svals; 1114 MPI_Comm comm = ((PetscObject)viewer)->comm; 1115 MPI_Status status; 1116 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1117 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1118 int tag = ((PetscObject)viewer)->tag; 1119 1120 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1121 if (!rank) { 1122 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1123 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1124 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 1125 } 1126 1127 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1128 M = header[1]; N = header[2]; nz = header[3]; 1129 1130 /* 1131 Handle case where matrix is stored on disk as a dense matrix 1132 */ 1133 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1134 return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 1135 } 1136 1137 /* determine ownership of all rows */ 1138 m = M/size + ((M % size) > rank); 1139 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1140 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1141 rowners[0] = 0; 1142 for ( i=2; i<=size; i++ ) { 1143 rowners[i] += rowners[i-1]; 1144 } 1145 rstart = rowners[rank]; 1146 rend = rowners[rank+1]; 1147 1148 /* distribute row lengths to all processors */ 1149 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1150 offlens = ourlens + (rend-rstart); 1151 if (!rank) { 1152 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1153 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1154 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1155 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1156 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1157 PetscFree(sndcounts); 1158 } 1159 else { 1160 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1161 } 1162 1163 if (!rank) { 1164 /* calculate the number of nonzeros on each processor */ 1165 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1166 PetscMemzero(procsnz,size*sizeof(int)); 1167 for ( i=0; i<size; i++ ) { 1168 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1169 procsnz[i] += rowlengths[j]; 1170 } 1171 } 1172 PetscFree(rowlengths); 1173 1174 /* determine max buffer needed and allocate it */ 1175 maxnz = 0; 1176 for ( i=0; i<size; i++ ) { 1177 maxnz = PetscMax(maxnz,procsnz[i]); 1178 } 1179 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1180 1181 /* read in my part of the matrix column indices */ 1182 nz = procsnz[0]; 1183 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1184 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1185 1186 /* read in every one elses and ship off */ 1187 for ( i=1; i<size; i++ ) { 1188 nz = procsnz[i]; 1189 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1190 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1191 } 1192 PetscFree(cols); 1193 } 1194 else { 1195 /* determine buffer space needed for message */ 1196 nz = 0; 1197 for ( i=0; i<m; i++ ) { 1198 nz += ourlens[i]; 1199 } 1200 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1201 1202 /* receive message of column indices*/ 1203 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1204 MPI_Get_count(&status,MPI_INT,&maxnz); 1205 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1206 } 1207 1208 /* loop over local rows, determining number of off diagonal entries */ 1209 PetscMemzero(offlens,m*sizeof(int)); 1210 jj = 0; 1211 for ( i=0; i<m; i++ ) { 1212 for ( j=0; j<ourlens[i]; j++ ) { 1213 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1214 jj++; 1215 } 1216 } 1217 1218 /* create our matrix */ 1219 for ( i=0; i<m; i++ ) { 1220 ourlens[i] -= offlens[i]; 1221 } 1222 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1223 A = *newmat; 1224 for ( i=0; i<m; i++ ) { 1225 ourlens[i] += offlens[i]; 1226 } 1227 1228 if (!rank) { 1229 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1230 1231 /* read in my part of the matrix numerical values */ 1232 nz = procsnz[0]; 1233 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1234 1235 /* insert into matrix */ 1236 jj = rstart; 1237 smycols = mycols; 1238 svals = vals; 1239 for ( i=0; i<m; i++ ) { 1240 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1241 smycols += ourlens[i]; 1242 svals += ourlens[i]; 1243 jj++; 1244 } 1245 1246 /* read in other processors and ship out */ 1247 for ( i=1; i<size; i++ ) { 1248 nz = procsnz[i]; 1249 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1250 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1251 } 1252 PetscFree(procsnz); 1253 } 1254 else { 1255 /* receive numeric values */ 1256 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1257 1258 /* receive message of values*/ 1259 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1260 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1261 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1262 1263 /* insert into matrix */ 1264 jj = rstart; 1265 smycols = mycols; 1266 svals = vals; 1267 for ( i=0; i<m; i++ ) { 1268 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1269 smycols += ourlens[i]; 1270 svals += ourlens[i]; 1271 jj++; 1272 } 1273 } 1274 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1275 1276 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1277 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1278 return 0; 1279 } 1280 1281 1282 1283 1284 1285