1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.100 1998/12/17 22:10:09 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNC__ 13 #define __FUNC__ "MatSetValues_MPIDense" 14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 15 { 16 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 17 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 18 int roworiented = A->roworiented; 19 20 PetscFunctionBegin; 21 for ( i=0; i<m; i++ ) { 22 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 23 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 24 if (idxm[i] >= rstart && idxm[i] < rend) { 25 row = idxm[i] - rstart; 26 if (roworiented) { 27 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 28 } else { 29 for ( j=0; j<n; j++ ) { 30 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 31 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 32 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 33 } 34 } 35 } else { 36 if (roworiented) { 37 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 38 } else { /* must stash each seperately */ 39 row = idxm[i]; 40 for ( j=0; j<n; j++ ) { 41 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 42 } 43 } 44 } 45 } 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNC__ 50 #define __FUNC__ "MatGetValues_MPIDense" 51 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 52 { 53 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 54 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 55 56 PetscFunctionBegin; 57 for ( i=0; i<m; i++ ) { 58 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 59 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 60 if (idxm[i] >= rstart && idxm[i] < rend) { 61 row = idxm[i] - rstart; 62 for ( j=0; j<n; j++ ) { 63 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 64 if (idxn[j] >= mdn->N) { 65 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 66 } 67 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68 } 69 } else { 70 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNC__ 77 #define __FUNC__ "MatGetArray_MPIDense" 78 int MatGetArray_MPIDense(Mat A,Scalar **array) 79 { 80 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 81 int ierr; 82 83 PetscFunctionBegin; 84 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreArray_MPIDense" 90 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 91 { 92 PetscFunctionBegin; 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNC__ 97 #define __FUNC__ "MatAssemblyBegin_MPIDense" 98 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 99 { 100 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 101 MPI_Comm comm = mat->comm; 102 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 103 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 104 int tag = mat->tag, *owner,*starts,count,ierr; 105 InsertMode addv; 106 MPI_Request *send_waits,*recv_waits; 107 Scalar *rvalues,*svalues; 108 109 PetscFunctionBegin; 110 /* make sure all processors are either in INSERTMODE or ADDMODE */ 111 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 112 if (addv == (ADD_VALUES|INSERT_VALUES)) { 113 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 114 } 115 mat->insertmode = addv; /* in case this processor had no cache */ 116 117 /* first count number of contributors to each processor */ 118 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 119 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 120 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 121 for ( i=0; i<mdn->stash.n; i++ ) { 122 idx = mdn->stash.idx[i]; 123 for ( j=0; j<size; j++ ) { 124 if (idx >= owners[j] && idx < owners[j+1]) { 125 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 126 } 127 } 128 } 129 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 130 131 /* inform other processors of number of messages and max length*/ 132 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 133 ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 134 nreceives = work[rank]; 135 if (nreceives > size) SETERRQ(PETSC_ERR_PLIB,0,"Internal PETSc error"); 136 ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 137 nmax = work[rank]; 138 PetscFree(work); 139 140 /* post receives: 141 1) each message will consist of ordered pairs 142 (global index,value) we store the global index as a double 143 to simplify the message passing. 144 2) since we don't know how long each individual message is we 145 allocate the largest needed buffer for each receive. Potentially 146 this is a lot of wasted space. 147 148 This could be done better. 149 */ 150 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 151 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 152 for ( i=0; i<nreceives; i++ ) { 153 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 154 } 155 156 /* do sends: 157 1) starts[i] gives the starting index in svalues for stuff going to 158 the ith processor 159 */ 160 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 161 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 162 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 163 starts[0] = 0; 164 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 165 for ( i=0; i<mdn->stash.n; i++ ) { 166 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 167 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 168 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 169 } 170 PetscFree(owner); 171 starts[0] = 0; 172 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 173 count = 0; 174 for ( i=0; i<size; i++ ) { 175 if (procs[i]) { 176 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 177 } 178 } 179 PetscFree(starts); PetscFree(nprocs); 180 181 /* Free cache space */ 182 PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 183 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 184 185 mdn->svalues = svalues; mdn->rvalues = rvalues; 186 mdn->nsends = nsends; mdn->nrecvs = nreceives; 187 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 188 mdn->rmax = nmax; 189 190 PetscFunctionReturn(0); 191 } 192 extern int MatSetUpMultiply_MPIDense(Mat); 193 194 #undef __FUNC__ 195 #define __FUNC__ "MatAssemblyEnd_MPIDense" 196 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 197 { 198 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 199 MPI_Status *send_status,recv_status; 200 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 201 Scalar *values,val; 202 InsertMode addv = mat->insertmode; 203 204 PetscFunctionBegin; 205 /* wait on receives */ 206 while (count) { 207 ierr = MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 208 /* unpack receives into our local space */ 209 values = mdn->rvalues + 3*imdex*mdn->rmax; 210 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 211 n = n/3; 212 for ( i=0; i<n; i++ ) { 213 row = (int) PetscReal(values[3*i]) - mdn->rstart; 214 col = (int) PetscReal(values[3*i+1]); 215 val = values[3*i+2]; 216 if (col >= 0 && col < mdn->N) { 217 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 218 } 219 else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid column");} 220 } 221 count--; 222 } 223 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 224 225 /* wait on sends */ 226 if (mdn->nsends) { 227 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 228 ierr = MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);CHKERRQ(ierr); 229 PetscFree(send_status); 230 } 231 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 232 233 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 234 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 235 236 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 237 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 238 } 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNC__ 243 #define __FUNC__ "MatZeroEntries_MPIDense" 244 int MatZeroEntries_MPIDense(Mat A) 245 { 246 int ierr; 247 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 248 249 PetscFunctionBegin; 250 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNC__ 255 #define __FUNC__ "MatGetBlockSize_MPIDense" 256 int MatGetBlockSize_MPIDense(Mat A,int *bs) 257 { 258 PetscFunctionBegin; 259 *bs = 1; 260 PetscFunctionReturn(0); 261 } 262 263 /* the code does not do the diagonal entries correctly unless the 264 matrix is square and the column and row owerships are identical. 265 This is a BUG. The only way to fix it seems to be to access 266 mdn->A and mdn->B directly and not through the MatZeroRows() 267 routine. 268 */ 269 #undef __FUNC__ 270 #define __FUNC__ "MatZeroRows_MPIDense" 271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 272 { 273 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 274 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 275 int *procs,*nprocs,j,found,idx,nsends,*work; 276 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 277 int *rvalues,tag = A->tag,count,base,slen,n,*source; 278 int *lens,imdex,*lrows,*values; 279 MPI_Comm comm = A->comm; 280 MPI_Request *send_waits,*recv_waits; 281 MPI_Status recv_status,*send_status; 282 IS istmp; 283 284 PetscFunctionBegin; 285 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 286 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 287 288 /* first count number of contributors to each processor */ 289 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 290 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 291 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 292 for ( i=0; i<N; i++ ) { 293 idx = rows[i]; 294 found = 0; 295 for ( j=0; j<size; j++ ) { 296 if (idx >= owners[j] && idx < owners[j+1]) { 297 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 298 } 299 } 300 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 301 } 302 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 303 304 /* inform other processors of number of messages and max length*/ 305 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 306 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 307 nrecvs = work[rank]; 308 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 309 nmax = work[rank]; 310 PetscFree(work); 311 312 /* post receives: */ 313 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 315 for ( i=0; i<nrecvs; i++ ) { 316 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 317 } 318 319 /* do sends: 320 1) starts[i] gives the starting index in svalues for stuff going to 321 the ith processor 322 */ 323 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 324 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 325 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 326 starts[0] = 0; 327 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 328 for ( i=0; i<N; i++ ) { 329 svalues[starts[owner[i]]++] = rows[i]; 330 } 331 ISRestoreIndices(is,&rows); 332 333 starts[0] = 0; 334 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 335 count = 0; 336 for ( i=0; i<size; i++ ) { 337 if (procs[i]) { 338 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 339 } 340 } 341 PetscFree(starts); 342 343 base = owners[rank]; 344 345 /* wait on receives */ 346 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 347 source = lens + nrecvs; 348 count = nrecvs; slen = 0; 349 while (count) { 350 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 351 /* unpack receives into our local space */ 352 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 353 source[imdex] = recv_status.MPI_SOURCE; 354 lens[imdex] = n; 355 slen += n; 356 count--; 357 } 358 PetscFree(recv_waits); 359 360 /* move the data into the send scatter */ 361 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 362 count = 0; 363 for ( i=0; i<nrecvs; i++ ) { 364 values = rvalues + i*nmax; 365 for ( j=0; j<lens[i]; j++ ) { 366 lrows[count++] = values[j] - base; 367 } 368 } 369 PetscFree(rvalues); PetscFree(lens); 370 PetscFree(owner); PetscFree(nprocs); 371 372 /* actually zap the local rows */ 373 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 374 PLogObjectParent(A,istmp); 375 PetscFree(lrows); 376 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 377 ierr = ISDestroy(istmp); CHKERRQ(ierr); 378 379 /* wait on sends */ 380 if (nsends) { 381 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 382 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 383 PetscFree(send_status); 384 } 385 PetscFree(send_waits); PetscFree(svalues); 386 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNC__ 391 #define __FUNC__ "MatMult_MPIDense" 392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 395 int ierr; 396 397 PetscFunctionBegin; 398 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatMultAdd_MPIDense" 406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 407 { 408 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNC__ 419 #define __FUNC__ "MatMultTrans_MPIDense" 420 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 421 { 422 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 423 int ierr; 424 Scalar zero = 0.0; 425 426 PetscFunctionBegin; 427 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 428 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 429 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 430 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNC__ 435 #define __FUNC__ "MatMultTransAdd_MPIDense" 436 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 437 { 438 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 439 int ierr; 440 441 PetscFunctionBegin; 442 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 443 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 444 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 445 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNC__ 450 #define __FUNC__ "MatGetDiagonal_MPIDense" 451 int MatGetDiagonal_MPIDense(Mat A,Vec v) 452 { 453 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 454 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 455 int ierr, len, i, n, m = a->m, radd; 456 Scalar *x, zero = 0.0; 457 458 PetscFunctionBegin; 459 VecSet(&zero,v); 460 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 461 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 462 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 463 len = PetscMin(aloc->m,aloc->n); 464 radd = a->rstart*m; 465 for ( i=0; i<len; i++ ) { 466 x[i] = aloc->v[radd + i*m + i]; 467 } 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatDestroy_MPIDense" 473 int MatDestroy_MPIDense(Mat mat) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 if (--mat->refct > 0) PetscFunctionReturn(0); 480 481 if (mat->mapping) { 482 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 483 } 484 if (mat->bmapping) { 485 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 486 } 487 #if defined(USE_PETSC_LOG) 488 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 489 #endif 490 PetscFree(mdn->rowners); 491 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 492 if (mdn->lvec) VecDestroy(mdn->lvec); 493 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 494 if (mdn->factor) { 495 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 496 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 497 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 498 PetscFree(mdn->factor); 499 } 500 PetscFree(mdn); 501 if (mat->rmap) { 502 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 503 } 504 if (mat->cmap) { 505 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 506 } 507 PLogObjectDestroy(mat); 508 PetscHeaderDestroy(mat); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNC__ 513 #define __FUNC__ "MatView_MPIDense_Binary" 514 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 515 { 516 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 517 int ierr; 518 519 PetscFunctionBegin; 520 if (mdn->size == 1) { 521 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 522 } 523 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNC__ 528 #define __FUNC__ "MatView_MPIDense_ASCII" 529 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 530 { 531 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 532 int ierr, format, size = mdn->size, rank = mdn->rank; 533 FILE *fd; 534 ViewerType vtype; 535 536 PetscFunctionBegin; 537 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 538 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 539 ierr = ViewerGetFormat(viewer,&format); 540 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 541 MatInfo info; 542 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 543 PetscSequentialPhaseBegin(mat->comm,1); 544 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 545 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 546 fflush(fd); 547 PetscSequentialPhaseEnd(mat->comm,1); 548 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 else if (format == VIEWER_FORMAT_ASCII_INFO) { 552 PetscFunctionReturn(0); 553 } 554 555 if (size == 1) { 556 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 557 } else { 558 /* assemble the entire matrix onto first processor. */ 559 Mat A; 560 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 561 Scalar *vals; 562 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 563 564 if (!rank) { 565 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 566 } else { 567 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 568 } 569 PLogObjectParent(mat,A); 570 571 /* Copy the matrix ... This isn't the most efficient means, 572 but it's quick for now */ 573 row = mdn->rstart; m = Amdn->m; 574 for ( i=0; i<m; i++ ) { 575 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 576 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 577 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 578 row++; 579 } 580 581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 583 if (!rank) { 584 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 585 } 586 ierr = MatDestroy(A); CHKERRQ(ierr); 587 } 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNC__ 592 #define __FUNC__ "MatView_MPIDense" 593 int MatView_MPIDense(Mat mat,Viewer viewer) 594 { 595 int ierr; 596 ViewerType vtype; 597 598 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 599 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 600 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 601 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 602 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 603 } else { 604 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 605 } 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNC__ 610 #define __FUNC__ "MatGetInfo_MPIDense" 611 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 612 { 613 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 614 Mat mdn = mat->A; 615 int ierr; 616 double isend[5], irecv[5]; 617 618 PetscFunctionBegin; 619 info->rows_global = (double)mat->M; 620 info->columns_global = (double)mat->N; 621 info->rows_local = (double)mat->m; 622 info->columns_local = (double)mat->N; 623 info->block_size = 1.0; 624 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 625 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 626 isend[3] = info->memory; isend[4] = info->mallocs; 627 if (flag == MAT_LOCAL) { 628 info->nz_used = isend[0]; 629 info->nz_allocated = isend[1]; 630 info->nz_unneeded = isend[2]; 631 info->memory = isend[3]; 632 info->mallocs = isend[4]; 633 } else if (flag == MAT_GLOBAL_MAX) { 634 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 635 info->nz_used = irecv[0]; 636 info->nz_allocated = irecv[1]; 637 info->nz_unneeded = irecv[2]; 638 info->memory = irecv[3]; 639 info->mallocs = irecv[4]; 640 } else if (flag == MAT_GLOBAL_SUM) { 641 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 642 info->nz_used = irecv[0]; 643 info->nz_allocated = irecv[1]; 644 info->nz_unneeded = irecv[2]; 645 info->memory = irecv[3]; 646 info->mallocs = irecv[4]; 647 } 648 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 649 info->fill_ratio_needed = 0; 650 info->factor_mallocs = 0; 651 PetscFunctionReturn(0); 652 } 653 654 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 655 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 656 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 657 extern int MatSolve_MPIDense(Mat,Vec,Vec); 658 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 659 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 660 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 661 662 #undef __FUNC__ 663 #define __FUNC__ "MatSetOption_MPIDense" 664 int MatSetOption_MPIDense(Mat A,MatOption op) 665 { 666 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 667 668 PetscFunctionBegin; 669 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 670 op == MAT_YES_NEW_NONZERO_LOCATIONS || 671 op == MAT_NEW_NONZERO_LOCATION_ERROR || 672 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 673 op == MAT_COLUMNS_SORTED || 674 op == MAT_COLUMNS_UNSORTED) { 675 MatSetOption(a->A,op); 676 } else if (op == MAT_ROW_ORIENTED) { 677 a->roworiented = 1; 678 MatSetOption(a->A,op); 679 } else if (op == MAT_ROWS_SORTED || 680 op == MAT_ROWS_UNSORTED || 681 op == MAT_SYMMETRIC || 682 op == MAT_STRUCTURALLY_SYMMETRIC || 683 op == MAT_YES_NEW_DIAGONALS || 684 op == MAT_USE_HASH_TABLE) { 685 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 686 } else if (op == MAT_COLUMN_ORIENTED) { 687 a->roworiented = 0; MatSetOption(a->A,op); 688 } else if (op == MAT_NO_NEW_DIAGONALS) { 689 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 690 } else { 691 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 692 } 693 PetscFunctionReturn(0); 694 } 695 696 #undef __FUNC__ 697 #define __FUNC__ "MatGetSize_MPIDense" 698 int MatGetSize_MPIDense(Mat A,int *m,int *n) 699 { 700 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 701 702 PetscFunctionBegin; 703 *m = mat->M; *n = mat->N; 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ "MatGetLocalSize_MPIDense" 709 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 710 { 711 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 712 713 PetscFunctionBegin; 714 *m = mat->m; *n = mat->N; 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNC__ 719 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 720 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 721 { 722 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 723 724 PetscFunctionBegin; 725 *m = mat->rstart; *n = mat->rend; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNC__ 730 #define __FUNC__ "MatGetRow_MPIDense" 731 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 732 { 733 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 734 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 735 736 PetscFunctionBegin; 737 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 738 lrow = row - rstart; 739 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNC__ 744 #define __FUNC__ "MatRestoreRow_MPIDense" 745 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 746 { 747 PetscFunctionBegin; 748 if (idx) PetscFree(*idx); 749 if (v) PetscFree(*v); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNC__ 754 #define __FUNC__ "MatNorm_MPIDense" 755 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 756 { 757 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 758 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 759 int ierr, i, j; 760 double sum = 0.0; 761 Scalar *v = mat->v; 762 763 PetscFunctionBegin; 764 if (mdn->size == 1) { 765 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 766 } else { 767 if (type == NORM_FROBENIUS) { 768 for (i=0; i<mat->n*mat->m; i++ ) { 769 #if defined(USE_PETSC_COMPLEX) 770 sum += PetscReal(PetscConj(*v)*(*v)); v++; 771 #else 772 sum += (*v)*(*v); v++; 773 #endif 774 } 775 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 776 *norm = sqrt(*norm); 777 PLogFlops(2*mat->n*mat->m); 778 } else if (type == NORM_1) { 779 double *tmp, *tmp2; 780 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 781 tmp2 = tmp + mdn->N; 782 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 783 *norm = 0.0; 784 v = mat->v; 785 for ( j=0; j<mat->n; j++ ) { 786 for ( i=0; i<mat->m; i++ ) { 787 tmp[j] += PetscAbsScalar(*v); v++; 788 } 789 } 790 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 791 for ( j=0; j<mdn->N; j++ ) { 792 if (tmp2[j] > *norm) *norm = tmp2[j]; 793 } 794 PetscFree(tmp); 795 PLogFlops(mat->n*mat->m); 796 } else if (type == NORM_INFINITY) { /* max row norm */ 797 double ntemp; 798 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 799 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 800 } else { 801 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 802 } 803 } 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNC__ 808 #define __FUNC__ "MatTranspose_MPIDense" 809 int MatTranspose_MPIDense(Mat A,Mat *matout) 810 { 811 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 812 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 813 Mat B; 814 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 815 int j, i, ierr; 816 Scalar *v; 817 818 PetscFunctionBegin; 819 if (matout == PETSC_NULL && M != N) { 820 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 821 } 822 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 823 824 m = Aloc->m; n = Aloc->n; v = Aloc->v; 825 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 826 for ( j=0; j<n; j++ ) { 827 for (i=0; i<m; i++) rwork[i] = rstart + i; 828 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 829 v += m; 830 } 831 PetscFree(rwork); 832 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 833 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 834 if (matout != PETSC_NULL) { 835 *matout = B; 836 } else { 837 PetscOps *Abops; 838 MatOps Aops; 839 840 /* This isn't really an in-place transpose, but free data struct from a */ 841 PetscFree(a->rowners); 842 ierr = MatDestroy(a->A); CHKERRQ(ierr); 843 if (a->lvec) VecDestroy(a->lvec); 844 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 845 PetscFree(a); 846 847 /* 848 This is horrible, horrible code. We need to keep the 849 A pointers for the bops and ops but copy everything 850 else from C. 851 */ 852 Abops = A->bops; 853 Aops = A->ops; 854 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 855 A->bops = Abops; 856 A->ops = Aops; 857 858 PetscHeaderDestroy(B); 859 } 860 PetscFunctionReturn(0); 861 } 862 863 #include "pinclude/blaslapack.h" 864 #undef __FUNC__ 865 #define __FUNC__ "MatScale_MPIDense" 866 int MatScale_MPIDense(Scalar *alpha,Mat inA) 867 { 868 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 869 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 870 int one = 1, nz; 871 872 PetscFunctionBegin; 873 nz = a->m*a->n; 874 BLscal_( &nz, alpha, a->v, &one ); 875 PLogFlops(nz); 876 PetscFunctionReturn(0); 877 } 878 879 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 880 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 881 882 /* -------------------------------------------------------------------*/ 883 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 884 MatGetRow_MPIDense, 885 MatRestoreRow_MPIDense, 886 MatMult_MPIDense, 887 MatMultAdd_MPIDense, 888 MatMultTrans_MPIDense, 889 MatMultTransAdd_MPIDense, 890 0, 891 0, 892 0, 893 0, 894 0, 895 0, 896 0, 897 MatTranspose_MPIDense, 898 MatGetInfo_MPIDense,0, 899 MatGetDiagonal_MPIDense, 900 0, 901 MatNorm_MPIDense, 902 MatAssemblyBegin_MPIDense, 903 MatAssemblyEnd_MPIDense, 904 0, 905 MatSetOption_MPIDense, 906 MatZeroEntries_MPIDense, 907 MatZeroRows_MPIDense, 908 0, 909 0, 910 0, 911 0, 912 MatGetSize_MPIDense, 913 MatGetLocalSize_MPIDense, 914 MatGetOwnershipRange_MPIDense, 915 0, 916 0, 917 MatGetArray_MPIDense, 918 MatRestoreArray_MPIDense, 919 MatDuplicate_MPIDense, 920 0, 921 0, 922 0, 923 0, 924 0, 925 MatGetSubMatrices_MPIDense, 926 0, 927 MatGetValues_MPIDense, 928 0, 929 0, 930 MatScale_MPIDense, 931 0, 932 0, 933 0, 934 MatGetBlockSize_MPIDense, 935 0, 936 0, 937 0, 938 0, 939 0, 940 0, 941 0, 942 0, 943 0, 944 0, 945 0, 946 0, 947 MatGetMaps_Petsc}; 948 949 #undef __FUNC__ 950 #define __FUNC__ "MatCreateMPIDense" 951 /*@C 952 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 953 954 Collective on MPI_Comm 955 956 Input Parameters: 957 + comm - MPI communicator 958 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 959 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 960 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 961 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 962 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 963 to control all matrix memory allocation. 964 965 Output Parameter: 966 . A - the matrix 967 968 Notes: 969 The dense format is fully compatible with standard Fortran 77 970 storage by columns. 971 972 The data input variable is intended primarily for Fortran programmers 973 who wish to allocate their own matrix memory space. Most users should 974 set data=PETSC_NULL. 975 976 The user MUST specify either the local or global matrix dimensions 977 (possibly both). 978 979 Currently, the only parallel dense matrix decomposition is by rows, 980 so that n=N and each submatrix owns all of the global columns. 981 982 .keywords: matrix, dense, parallel 983 984 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 985 @*/ 986 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 987 { 988 Mat mat; 989 Mat_MPIDense *a; 990 int ierr, i,flg; 991 992 PetscFunctionBegin; 993 /* Note: For now, when data is specified above, this assumes the user correctly 994 allocates the local dense storage space. We should add error checking. */ 995 996 *A = 0; 997 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 998 PLogObjectCreate(mat); 999 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 1000 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1001 mat->ops->destroy = MatDestroy_MPIDense; 1002 mat->ops->view = MatView_MPIDense; 1003 mat->factor = 0; 1004 mat->mapping = 0; 1005 1006 a->factor = 0; 1007 mat->insertmode = NOT_SET_VALUES; 1008 MPI_Comm_rank(comm,&a->rank); 1009 MPI_Comm_size(comm,&a->size); 1010 1011 if (M == PETSC_DECIDE) {ierr = MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);} 1012 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1013 1014 /* 1015 The computation of n is wrong below, n should represent the number of local 1016 rows in the right (column vector) 1017 */ 1018 1019 /* each row stores all columns */ 1020 if (N == PETSC_DECIDE) N = n; 1021 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1022 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 1023 a->N = mat->N = N; 1024 a->M = mat->M = M; 1025 a->m = mat->m = m; 1026 a->n = mat->n = n; 1027 1028 /* the information in the maps duplicates the information computed below, eventually 1029 we should remove the duplicate information that is not contained in the maps */ 1030 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1031 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1032 1033 /* build local table of row and column ownerships */ 1034 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1035 a->cowners = a->rowners + a->size + 1; 1036 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1037 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1038 a->rowners[0] = 0; 1039 for ( i=2; i<=a->size; i++ ) { 1040 a->rowners[i] += a->rowners[i-1]; 1041 } 1042 a->rstart = a->rowners[a->rank]; 1043 a->rend = a->rowners[a->rank+1]; 1044 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1045 a->cowners[0] = 0; 1046 for ( i=2; i<=a->size; i++ ) { 1047 a->cowners[i] += a->cowners[i-1]; 1048 } 1049 1050 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 1051 PLogObjectParent(mat,a->A); 1052 1053 /* build cache for off array entries formed */ 1054 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1055 1056 /* stuff used for matrix vector multiply */ 1057 a->lvec = 0; 1058 a->Mvctx = 0; 1059 a->roworiented = 1; 1060 1061 *A = mat; 1062 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1063 if (flg) { 1064 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNC__ 1070 #define __FUNC__ "MatDuplicate_MPIDense" 1071 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1072 { 1073 Mat mat; 1074 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1075 int ierr; 1076 FactorCtx *factor; 1077 1078 PetscFunctionBegin; 1079 *newmat = 0; 1080 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1081 PLogObjectCreate(mat); 1082 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 1083 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1084 mat->ops->destroy = MatDestroy_MPIDense; 1085 mat->ops->view = MatView_MPIDense; 1086 mat->factor = A->factor; 1087 mat->assembled = PETSC_TRUE; 1088 1089 a->m = mat->m = oldmat->m; 1090 a->n = mat->n = oldmat->n; 1091 a->M = mat->M = oldmat->M; 1092 a->N = mat->N = oldmat->N; 1093 if (oldmat->factor) { 1094 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 1095 /* copy factor contents ... add this code! */ 1096 } else a->factor = 0; 1097 1098 a->rstart = oldmat->rstart; 1099 a->rend = oldmat->rend; 1100 a->size = oldmat->size; 1101 a->rank = oldmat->rank; 1102 mat->insertmode = NOT_SET_VALUES; 1103 1104 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1105 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1106 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1107 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1108 1109 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1110 PLogObjectParent(mat,a->lvec); 1111 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1112 PLogObjectParent(mat,a->Mvctx); 1113 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 1114 PLogObjectParent(mat,a->A); 1115 *newmat = mat; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #include "sys.h" 1120 1121 #undef __FUNC__ 1122 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1123 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1124 { 1125 int *rowners, i,size,rank,m,ierr,nz,j; 1126 Scalar *array,*vals,*vals_ptr; 1127 MPI_Status status; 1128 1129 PetscFunctionBegin; 1130 MPI_Comm_rank(comm,&rank); 1131 MPI_Comm_size(comm,&size); 1132 1133 /* determine ownership of all rows */ 1134 m = M/size + ((M % size) > rank); 1135 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1136 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1137 rowners[0] = 0; 1138 for ( i=2; i<=size; i++ ) { 1139 rowners[i] += rowners[i-1]; 1140 } 1141 1142 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1143 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 1144 1145 if (!rank) { 1146 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1147 1148 /* read in my part of the matrix numerical values */ 1149 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR); CHKERRQ(ierr); 1150 1151 /* insert into matrix-by row (this is why cannot directly read into array */ 1152 vals_ptr = vals; 1153 for ( i=0; i<m; i++ ) { 1154 for ( j=0; j<N; j++ ) { 1155 array[i + j*m] = *vals_ptr++; 1156 } 1157 } 1158 1159 /* read in other processors and ship out */ 1160 for ( i=1; i<size; i++ ) { 1161 nz = (rowners[i+1] - rowners[i])*N; 1162 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1163 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1164 } 1165 } else { 1166 /* receive numeric values */ 1167 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1168 1169 /* receive message of values*/ 1170 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1171 1172 /* insert into matrix-by row (this is why cannot directly read into array */ 1173 vals_ptr = vals; 1174 for ( i=0; i<m; i++ ) { 1175 for ( j=0; j<N; j++ ) { 1176 array[i + j*m] = *vals_ptr++; 1177 } 1178 } 1179 } 1180 PetscFree(rowners); 1181 PetscFree(vals); 1182 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1183 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 1188 #undef __FUNC__ 1189 #define __FUNC__ "MatLoad_MPIDense" 1190 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1191 { 1192 Mat A; 1193 Scalar *vals,*svals; 1194 MPI_Comm comm = ((PetscObject)viewer)->comm; 1195 MPI_Status status; 1196 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1197 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1198 int tag = ((PetscObject)viewer)->tag; 1199 int i, nz, ierr, j,rstart, rend, fd; 1200 1201 PetscFunctionBegin; 1202 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1203 if (!rank) { 1204 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1205 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1206 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1207 } 1208 1209 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1210 M = header[1]; N = header[2]; nz = header[3]; 1211 1212 /* 1213 Handle case where matrix is stored on disk as a dense matrix 1214 */ 1215 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1216 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 /* determine ownership of all rows */ 1221 m = M/size + ((M % size) > rank); 1222 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1223 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1224 rowners[0] = 0; 1225 for ( i=2; i<=size; i++ ) { 1226 rowners[i] += rowners[i-1]; 1227 } 1228 rstart = rowners[rank]; 1229 rend = rowners[rank+1]; 1230 1231 /* distribute row lengths to all processors */ 1232 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1233 offlens = ourlens + (rend-rstart); 1234 if (!rank) { 1235 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1236 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1237 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1238 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1239 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1240 PetscFree(sndcounts); 1241 } else { 1242 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1243 } 1244 1245 if (!rank) { 1246 /* calculate the number of nonzeros on each processor */ 1247 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1248 PetscMemzero(procsnz,size*sizeof(int)); 1249 for ( i=0; i<size; i++ ) { 1250 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1251 procsnz[i] += rowlengths[j]; 1252 } 1253 } 1254 PetscFree(rowlengths); 1255 1256 /* determine max buffer needed and allocate it */ 1257 maxnz = 0; 1258 for ( i=0; i<size; i++ ) { 1259 maxnz = PetscMax(maxnz,procsnz[i]); 1260 } 1261 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1262 1263 /* read in my part of the matrix column indices */ 1264 nz = procsnz[0]; 1265 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1266 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1267 1268 /* read in every one elses and ship off */ 1269 for ( i=1; i<size; i++ ) { 1270 nz = procsnz[i]; 1271 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1272 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1273 } 1274 PetscFree(cols); 1275 } else { 1276 /* determine buffer space needed for message */ 1277 nz = 0; 1278 for ( i=0; i<m; i++ ) { 1279 nz += ourlens[i]; 1280 } 1281 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1282 1283 /* receive message of column indices*/ 1284 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1285 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1286 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1287 } 1288 1289 /* loop over local rows, determining number of off diagonal entries */ 1290 PetscMemzero(offlens,m*sizeof(int)); 1291 jj = 0; 1292 for ( i=0; i<m; i++ ) { 1293 for ( j=0; j<ourlens[i]; j++ ) { 1294 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1295 jj++; 1296 } 1297 } 1298 1299 /* create our matrix */ 1300 for ( i=0; i<m; i++ ) { 1301 ourlens[i] -= offlens[i]; 1302 } 1303 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1304 A = *newmat; 1305 for ( i=0; i<m; i++ ) { 1306 ourlens[i] += offlens[i]; 1307 } 1308 1309 if (!rank) { 1310 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1311 1312 /* read in my part of the matrix numerical values */ 1313 nz = procsnz[0]; 1314 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1315 1316 /* insert into matrix */ 1317 jj = rstart; 1318 smycols = mycols; 1319 svals = vals; 1320 for ( i=0; i<m; i++ ) { 1321 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1322 smycols += ourlens[i]; 1323 svals += ourlens[i]; 1324 jj++; 1325 } 1326 1327 /* read in other processors and ship out */ 1328 for ( i=1; i<size; i++ ) { 1329 nz = procsnz[i]; 1330 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1331 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1332 } 1333 PetscFree(procsnz); 1334 } else { 1335 /* receive numeric values */ 1336 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1337 1338 /* receive message of values*/ 1339 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1340 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1341 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1342 1343 /* insert into matrix */ 1344 jj = rstart; 1345 smycols = mycols; 1346 svals = vals; 1347 for ( i=0; i<m; i++ ) { 1348 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1349 smycols += ourlens[i]; 1350 svals += ourlens[i]; 1351 jj++; 1352 } 1353 } 1354 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1355 1356 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1357 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 1362 1363 1364 1365