1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.81 1998/03/16 18:55:07 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNC__ 13 #define __FUNC__ "MatSetValues_MPIDense" 14 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 15 { 16 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 17 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 18 int roworiented = A->roworiented; 19 20 PetscFunctionBegin; 21 for ( i=0; i<m; i++ ) { 22 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 23 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 24 if (idxm[i] >= rstart && idxm[i] < rend) { 25 row = idxm[i] - rstart; 26 if (roworiented) { 27 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 28 } else { 29 for ( j=0; j<n; j++ ) { 30 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 31 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 32 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 33 } 34 } 35 } else { 36 if (roworiented) { 37 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 38 } else { /* must stash each seperately */ 39 row = idxm[i]; 40 for ( j=0; j<n; j++ ) { 41 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 42 } 43 } 44 } 45 } 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNC__ 50 #define __FUNC__ "MatGetValues_MPIDense" 51 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 52 { 53 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 54 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 55 56 PetscFunctionBegin; 57 for ( i=0; i<m; i++ ) { 58 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 59 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 60 if (idxm[i] >= rstart && idxm[i] < rend) { 61 row = idxm[i] - rstart; 62 for ( j=0; j<n; j++ ) { 63 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 64 if (idxn[j] >= mdn->N) { 65 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 66 } 67 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68 } 69 } else { 70 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNC__ 77 #define __FUNC__ "MatGetArray_MPIDense" 78 int MatGetArray_MPIDense(Mat A,Scalar **array) 79 { 80 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 81 int ierr; 82 83 PetscFunctionBegin; 84 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreArray_MPIDense" 90 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 91 { 92 PetscFunctionBegin; 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNC__ 97 #define __FUNC__ "MatAssemblyBegin_MPIDense" 98 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 99 { 100 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 101 MPI_Comm comm = mat->comm; 102 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 103 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 104 int tag = mat->tag, *owner,*starts,count,ierr; 105 InsertMode addv; 106 MPI_Request *send_waits,*recv_waits; 107 Scalar *rvalues,*svalues; 108 109 PetscFunctionBegin; 110 /* make sure all processors are either in INSERTMODE or ADDMODE */ 111 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 112 if (addv == (ADD_VALUES|INSERT_VALUES)) { 113 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 114 } 115 mat->insertmode = addv; /* in case this processor had no cache */ 116 117 /* first count number of contributors to each processor */ 118 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 119 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 120 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 121 for ( i=0; i<mdn->stash.n; i++ ) { 122 idx = mdn->stash.idx[i]; 123 for ( j=0; j<size; j++ ) { 124 if (idx >= owners[j] && idx < owners[j+1]) { 125 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 126 } 127 } 128 } 129 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 130 131 /* inform other processors of number of messages and max length*/ 132 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 133 ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 134 nreceives = work[rank]; 135 if (nreceives > size) SETERRQ(PETSC_ERR_PLIB,0,"Internal PETSc error"); 136 ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 137 nmax = work[rank]; 138 PetscFree(work); 139 140 /* post receives: 141 1) each message will consist of ordered pairs 142 (global index,value) we store the global index as a double 143 to simplify the message passing. 144 2) since we don't know how long each individual message is we 145 allocate the largest needed buffer for each receive. Potentially 146 this is a lot of wasted space. 147 148 This could be done better. 149 */ 150 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 151 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 152 for ( i=0; i<nreceives; i++ ) { 153 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 154 } 155 156 /* do sends: 157 1) starts[i] gives the starting index in svalues for stuff going to 158 the ith processor 159 */ 160 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 161 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 162 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 163 starts[0] = 0; 164 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 165 for ( i=0; i<mdn->stash.n; i++ ) { 166 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 167 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 168 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 169 } 170 PetscFree(owner); 171 starts[0] = 0; 172 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 173 count = 0; 174 for ( i=0; i<size; i++ ) { 175 if (procs[i]) { 176 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 177 } 178 } 179 PetscFree(starts); PetscFree(nprocs); 180 181 /* Free cache space */ 182 PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 183 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 184 185 mdn->svalues = svalues; mdn->rvalues = rvalues; 186 mdn->nsends = nsends; mdn->nrecvs = nreceives; 187 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 188 mdn->rmax = nmax; 189 190 PetscFunctionReturn(0); 191 } 192 extern int MatSetUpMultiply_MPIDense(Mat); 193 194 #undef __FUNC__ 195 #define __FUNC__ "MatAssemblyEnd_MPIDense" 196 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 197 { 198 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 199 MPI_Status *send_status,recv_status; 200 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 201 Scalar *values,val; 202 InsertMode addv = mat->insertmode; 203 204 PetscFunctionBegin; 205 /* wait on receives */ 206 while (count) { 207 ierr = MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 208 /* unpack receives into our local space */ 209 values = mdn->rvalues + 3*imdex*mdn->rmax; 210 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 211 n = n/3; 212 for ( i=0; i<n; i++ ) { 213 row = (int) PetscReal(values[3*i]) - mdn->rstart; 214 col = (int) PetscReal(values[3*i+1]); 215 val = values[3*i+2]; 216 if (col >= 0 && col < mdn->N) { 217 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 218 } 219 else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid column");} 220 } 221 count--; 222 } 223 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 224 225 /* wait on sends */ 226 if (mdn->nsends) { 227 send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 228 ierr = MPI_Waitall(mdn->nsends,mdn->send_waits,send_status);CHKERRQ(ierr); 229 PetscFree(send_status); 230 } 231 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 232 233 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 234 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 235 236 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 237 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 238 } 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNC__ 243 #define __FUNC__ "MatZeroEntries_MPIDense" 244 int MatZeroEntries_MPIDense(Mat A) 245 { 246 int ierr; 247 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 248 249 PetscFunctionBegin; 250 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNC__ 255 #define __FUNC__ "MatGetBlockSize_MPIDense" 256 int MatGetBlockSize_MPIDense(Mat A,int *bs) 257 { 258 PetscFunctionBegin; 259 *bs = 1; 260 PetscFunctionReturn(0); 261 } 262 263 /* the code does not do the diagonal entries correctly unless the 264 matrix is square and the column and row owerships are identical. 265 This is a BUG. The only way to fix it seems to be to access 266 mdn->A and mdn->B directly and not through the MatZeroRows() 267 routine. 268 */ 269 #undef __FUNC__ 270 #define __FUNC__ "MatZeroRows_MPIDense" 271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 272 { 273 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 274 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 275 int *procs,*nprocs,j,found,idx,nsends,*work; 276 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 277 int *rvalues,tag = A->tag,count,base,slen,n,*source; 278 int *lens,imdex,*lrows,*values; 279 MPI_Comm comm = A->comm; 280 MPI_Request *send_waits,*recv_waits; 281 MPI_Status recv_status,*send_status; 282 IS istmp; 283 284 PetscFunctionBegin; 285 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 286 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 287 288 /* first count number of contributors to each processor */ 289 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 290 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 291 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 292 for ( i=0; i<N; i++ ) { 293 idx = rows[i]; 294 found = 0; 295 for ( j=0; j<size; j++ ) { 296 if (idx >= owners[j] && idx < owners[j+1]) { 297 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 298 } 299 } 300 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 301 } 302 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 303 304 /* inform other processors of number of messages and max length*/ 305 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 306 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 307 nrecvs = work[rank]; 308 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 309 nmax = work[rank]; 310 PetscFree(work); 311 312 /* post receives: */ 313 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 315 for ( i=0; i<nrecvs; i++ ) { 316 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 317 } 318 319 /* do sends: 320 1) starts[i] gives the starting index in svalues for stuff going to 321 the ith processor 322 */ 323 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 324 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 325 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 326 starts[0] = 0; 327 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 328 for ( i=0; i<N; i++ ) { 329 svalues[starts[owner[i]]++] = rows[i]; 330 } 331 ISRestoreIndices(is,&rows); 332 333 starts[0] = 0; 334 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 335 count = 0; 336 for ( i=0; i<size; i++ ) { 337 if (procs[i]) { 338 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 339 } 340 } 341 PetscFree(starts); 342 343 base = owners[rank]; 344 345 /* wait on receives */ 346 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 347 source = lens + nrecvs; 348 count = nrecvs; slen = 0; 349 while (count) { 350 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 351 /* unpack receives into our local space */ 352 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 353 source[imdex] = recv_status.MPI_SOURCE; 354 lens[imdex] = n; 355 slen += n; 356 count--; 357 } 358 PetscFree(recv_waits); 359 360 /* move the data into the send scatter */ 361 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 362 count = 0; 363 for ( i=0; i<nrecvs; i++ ) { 364 values = rvalues + i*nmax; 365 for ( j=0; j<lens[i]; j++ ) { 366 lrows[count++] = values[j] - base; 367 } 368 } 369 PetscFree(rvalues); PetscFree(lens); 370 PetscFree(owner); PetscFree(nprocs); 371 372 /* actually zap the local rows */ 373 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 374 PLogObjectParent(A,istmp); 375 PetscFree(lrows); 376 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 377 ierr = ISDestroy(istmp); CHKERRQ(ierr); 378 379 /* wait on sends */ 380 if (nsends) { 381 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 382 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 383 PetscFree(send_status); 384 } 385 PetscFree(send_waits); PetscFree(svalues); 386 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNC__ 391 #define __FUNC__ "MatMult_MPIDense" 392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 395 int ierr; 396 397 PetscFunctionBegin; 398 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatMultAdd_MPIDense" 406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 407 { 408 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNC__ 419 #define __FUNC__ "MatMultTrans_MPIDense" 420 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 421 { 422 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 423 int ierr; 424 Scalar zero = 0.0; 425 426 PetscFunctionBegin; 427 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 428 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 429 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 430 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNC__ 435 #define __FUNC__ "MatMultTransAdd_MPIDense" 436 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 437 { 438 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 439 int ierr; 440 441 PetscFunctionBegin; 442 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 443 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 444 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 445 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNC__ 450 #define __FUNC__ "MatGetDiagonal_MPIDense" 451 int MatGetDiagonal_MPIDense(Mat A,Vec v) 452 { 453 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 454 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 455 int ierr, len, i, n, m = a->m, radd; 456 Scalar *x, zero = 0.0; 457 458 PetscFunctionBegin; 459 VecSet(&zero,v); 460 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 461 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 462 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 463 len = PetscMin(aloc->m,aloc->n); 464 radd = a->rstart*m; 465 for ( i=0; i<len; i++ ) { 466 x[i] = aloc->v[radd + i*m + i]; 467 } 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatDestroy_MPIDense" 473 int MatDestroy_MPIDense(Mat mat) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 #if defined(USE_PETSC_LOG) 480 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 481 #endif 482 PetscFree(mdn->rowners); 483 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 484 if (mdn->lvec) VecDestroy(mdn->lvec); 485 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 486 if (mdn->factor) { 487 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 488 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 489 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 490 PetscFree(mdn->factor); 491 } 492 PetscFree(mdn); 493 if (mat->mapping) { 494 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 495 } 496 PLogObjectDestroy(mat); 497 PetscHeaderDestroy(mat); 498 PetscFunctionReturn(0); 499 } 500 501 #include "pinclude/pviewer.h" 502 503 #undef __FUNC__ 504 #define __FUNC__ "MatView_MPIDense_Binary" 505 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 506 { 507 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 508 int ierr; 509 510 PetscFunctionBegin; 511 if (mdn->size == 1) { 512 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 513 } 514 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNC__ 519 #define __FUNC__ "MatView_MPIDense_ASCII" 520 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 521 { 522 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 523 int ierr, format; 524 FILE *fd; 525 ViewerType vtype; 526 527 PetscFunctionBegin; 528 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 529 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 530 ierr = ViewerGetFormat(viewer,&format); 531 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 532 int rank; 533 MatInfo info; 534 MPI_Comm_rank(mat->comm,&rank); 535 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 536 PetscSequentialPhaseBegin(mat->comm,1); 537 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 538 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 539 fflush(fd); 540 PetscSequentialPhaseEnd(mat->comm,1); 541 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 else if (format == VIEWER_FORMAT_ASCII_INFO) { 545 PetscFunctionReturn(0); 546 } 547 if (vtype == ASCII_FILE_VIEWER) { 548 PetscSequentialPhaseBegin(mat->comm,1); 549 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 550 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 551 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 552 fflush(fd); 553 PetscSequentialPhaseEnd(mat->comm,1); 554 } else { 555 int size = mdn->size, rank = mdn->rank; 556 if (size == 1) { 557 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 558 } else { 559 /* assemble the entire matrix onto first processor. */ 560 Mat A; 561 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 562 Scalar *vals; 563 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 564 565 if (!rank) { 566 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 567 } else { 568 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 569 } 570 PLogObjectParent(mat,A); 571 572 /* Copy the matrix ... This isn't the most efficient means, 573 but it's quick for now */ 574 row = mdn->rstart; m = Amdn->m; 575 for ( i=0; i<m; i++ ) { 576 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 577 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 578 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 579 row++; 580 } 581 582 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 583 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 584 if (!rank) { 585 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 586 } 587 ierr = MatDestroy(A); CHKERRQ(ierr); 588 } 589 } 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNC__ 594 #define __FUNC__ "MatView_MPIDense" 595 int MatView_MPIDense(Mat mat,Viewer viewer) 596 { 597 int ierr; 598 ViewerType vtype; 599 600 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 601 if (vtype == ASCII_FILE_VIEWER) { 602 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 603 } else if (vtype == ASCII_FILES_VIEWER) { 604 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 605 } else if (vtype == BINARY_FILE_VIEWER) { 606 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNC__ 612 #define __FUNC__ "MatGetInfo_MPIDense" 613 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 614 { 615 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 616 Mat mdn = mat->A; 617 int ierr; 618 double isend[5], irecv[5]; 619 620 PetscFunctionBegin; 621 info->rows_global = (double)mat->M; 622 info->columns_global = (double)mat->N; 623 info->rows_local = (double)mat->m; 624 info->columns_local = (double)mat->N; 625 info->block_size = 1.0; 626 ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 627 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 628 isend[3] = info->memory; isend[4] = info->mallocs; 629 if (flag == MAT_LOCAL) { 630 info->nz_used = isend[0]; 631 info->nz_allocated = isend[1]; 632 info->nz_unneeded = isend[2]; 633 info->memory = isend[3]; 634 info->mallocs = isend[4]; 635 } else if (flag == MAT_GLOBAL_MAX) { 636 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm);CHKERRQ(ierr); 637 info->nz_used = irecv[0]; 638 info->nz_allocated = irecv[1]; 639 info->nz_unneeded = irecv[2]; 640 info->memory = irecv[3]; 641 info->mallocs = irecv[4]; 642 } else if (flag == MAT_GLOBAL_SUM) { 643 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm);CHKERRQ(ierr); 644 info->nz_used = irecv[0]; 645 info->nz_allocated = irecv[1]; 646 info->nz_unneeded = irecv[2]; 647 info->memory = irecv[3]; 648 info->mallocs = irecv[4]; 649 } 650 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 651 info->fill_ratio_needed = 0; 652 info->factor_mallocs = 0; 653 PetscFunctionReturn(0); 654 } 655 656 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 657 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 658 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 659 extern int MatSolve_MPIDense(Mat,Vec,Vec); 660 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 661 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 662 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 663 664 #undef __FUNC__ 665 #define __FUNC__ "MatSetOption_MPIDense" 666 int MatSetOption_MPIDense(Mat A,MatOption op) 667 { 668 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 669 670 PetscFunctionBegin; 671 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 672 op == MAT_YES_NEW_NONZERO_LOCATIONS || 673 op == MAT_NEW_NONZERO_LOCATION_ERROR || 674 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 675 op == MAT_COLUMNS_SORTED || 676 op == MAT_COLUMNS_UNSORTED) { 677 MatSetOption(a->A,op); 678 } else if (op == MAT_ROW_ORIENTED) { 679 a->roworiented = 1; 680 MatSetOption(a->A,op); 681 } else if (op == MAT_ROWS_SORTED || 682 op == MAT_ROWS_UNSORTED || 683 op == MAT_SYMMETRIC || 684 op == MAT_STRUCTURALLY_SYMMETRIC || 685 op == MAT_YES_NEW_DIAGONALS || 686 op == MAT_USE_HASH_TABLE) { 687 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 688 } else if (op == MAT_COLUMN_ORIENTED) { 689 a->roworiented = 0; MatSetOption(a->A,op); 690 } else if (op == MAT_NO_NEW_DIAGONALS) { 691 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 692 } else { 693 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 694 } 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNC__ 699 #define __FUNC__ "MatGetSize_MPIDense" 700 int MatGetSize_MPIDense(Mat A,int *m,int *n) 701 { 702 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 703 704 PetscFunctionBegin; 705 *m = mat->M; *n = mat->N; 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNC__ 710 #define __FUNC__ "MatGetLocalSize_MPIDense" 711 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 712 { 713 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 714 715 PetscFunctionBegin; 716 *m = mat->m; *n = mat->N; 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNC__ 721 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 722 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 723 { 724 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 725 726 PetscFunctionBegin; 727 *m = mat->rstart; *n = mat->rend; 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNC__ 732 #define __FUNC__ "MatGetRow_MPIDense" 733 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 734 { 735 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 736 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 737 738 PetscFunctionBegin; 739 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 740 lrow = row - rstart; 741 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNC__ 746 #define __FUNC__ "MatRestoreRow_MPIDense" 747 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 748 { 749 PetscFunctionBegin; 750 if (idx) PetscFree(*idx); 751 if (v) PetscFree(*v); 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNC__ 756 #define __FUNC__ "MatNorm_MPIDense" 757 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 758 { 759 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 760 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 761 int ierr, i, j; 762 double sum = 0.0; 763 Scalar *v = mat->v; 764 765 PetscFunctionBegin; 766 if (mdn->size == 1) { 767 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 768 } else { 769 if (type == NORM_FROBENIUS) { 770 for (i=0; i<mat->n*mat->m; i++ ) { 771 #if defined(USE_PETSC_COMPLEX) 772 sum += real(conj(*v)*(*v)); v++; 773 #else 774 sum += (*v)*(*v); v++; 775 #endif 776 } 777 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 778 *norm = sqrt(*norm); 779 PLogFlops(2*mat->n*mat->m); 780 } else if (type == NORM_1) { 781 double *tmp, *tmp2; 782 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 783 tmp2 = tmp + mdn->N; 784 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 785 *norm = 0.0; 786 v = mat->v; 787 for ( j=0; j<mat->n; j++ ) { 788 for ( i=0; i<mat->m; i++ ) { 789 tmp[j] += PetscAbsScalar(*v); v++; 790 } 791 } 792 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 793 for ( j=0; j<mdn->N; j++ ) { 794 if (tmp2[j] > *norm) *norm = tmp2[j]; 795 } 796 PetscFree(tmp); 797 PLogFlops(mat->n*mat->m); 798 } else if (type == NORM_INFINITY) { /* max row norm */ 799 double ntemp; 800 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 801 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 802 } else { 803 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 804 } 805 } 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNC__ 810 #define __FUNC__ "MatTranspose_MPIDense" 811 int MatTranspose_MPIDense(Mat A,Mat *matout) 812 { 813 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 814 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 815 Mat B; 816 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 817 int j, i, ierr; 818 Scalar *v; 819 820 PetscFunctionBegin; 821 if (matout == PETSC_NULL && M != N) { 822 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 823 } 824 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 825 826 m = Aloc->m; n = Aloc->n; v = Aloc->v; 827 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 828 for ( j=0; j<n; j++ ) { 829 for (i=0; i<m; i++) rwork[i] = rstart + i; 830 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 831 v += m; 832 } 833 PetscFree(rwork); 834 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 835 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 836 if (matout != PETSC_NULL) { 837 *matout = B; 838 } else { 839 PetscOps *Abops; 840 struct _MatOps *Aops; 841 842 /* This isn't really an in-place transpose, but free data struct from a */ 843 PetscFree(a->rowners); 844 ierr = MatDestroy(a->A); CHKERRQ(ierr); 845 if (a->lvec) VecDestroy(a->lvec); 846 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 847 PetscFree(a); 848 849 /* 850 This is horrible, horrible code. We need to keep the 851 A pointers for the bops and ops but copy everything 852 else from C. 853 */ 854 Abops = A->bops; 855 Aops = A->ops; 856 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 857 A->bops = Abops; 858 A->ops = Aops; 859 860 PetscHeaderDestroy(B); 861 } 862 PetscFunctionReturn(0); 863 } 864 865 #include "pinclude/plapack.h" 866 #undef __FUNC__ 867 #define __FUNC__ "MatScale_MPIDense" 868 int MatScale_MPIDense(Scalar *alpha,Mat inA) 869 { 870 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 871 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 872 int one = 1, nz; 873 874 PetscFunctionBegin; 875 nz = a->m*a->n; 876 BLscal_( &nz, alpha, a->v, &one ); 877 PLogFlops(nz); 878 PetscFunctionReturn(0); 879 } 880 881 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 882 883 /* -------------------------------------------------------------------*/ 884 static struct _MatOps MatOps = {MatSetValues_MPIDense, 885 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 886 MatMult_MPIDense,MatMultAdd_MPIDense, 887 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 888 /* MatSolve_MPIDense,0, */ 889 0,0, 890 0,0, 891 0,0, 892 /* MatLUFactor_MPIDense,0, */ 893 0,MatTranspose_MPIDense, 894 MatGetInfo_MPIDense,0, 895 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 896 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 897 0, 898 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 899 0,0, 900 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 901 0,0, 902 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 903 MatGetOwnershipRange_MPIDense, 904 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 905 MatConvertSameType_MPIDense, 906 0,0,0,0,0, 907 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 908 0,0,0,MatGetBlockSize_MPIDense}; 909 910 #undef __FUNC__ 911 #define __FUNC__ "MatCreateMPIDense" 912 /*@C 913 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 914 915 Input Parameters: 916 . comm - MPI communicator 917 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 918 . n - number of local columns (or PETSC_DECIDE to have calculated 919 if N is given) 920 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 921 . N - number of global columns (or PETSC_DECIDE to have calculated 922 if n is given) 923 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 924 to control all matrix memory allocation. 925 926 Output Parameter: 927 . A - the matrix 928 929 Notes: 930 The dense format is fully compatible with standard Fortran 77 931 storage by columns. 932 933 The data input variable is intended primarily for Fortran programmers 934 who wish to allocate their own matrix memory space. Most users should 935 set data=PETSC_NULL. 936 937 The user MUST specify either the local or global matrix dimensions 938 (possibly both). 939 940 Currently, the only parallel dense matrix decomposition is by rows, 941 so that n=N and each submatrix owns all of the global columns. 942 943 .keywords: matrix, dense, parallel 944 945 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 946 @*/ 947 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 948 { 949 Mat mat; 950 Mat_MPIDense *a; 951 int ierr, i,flg; 952 953 PetscFunctionBegin; 954 /* Note: For now, when data is specified above, this assumes the user correctly 955 allocates the local dense storage space. We should add error checking. */ 956 957 *A = 0; 958 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,comm,MatDestroy,MatView); 959 PLogObjectCreate(mat); 960 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 961 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 962 mat->ops->destroy = MatDestroy_MPIDense; 963 mat->ops->view = MatView_MPIDense; 964 mat->factor = 0; 965 mat->mapping = 0; 966 967 a->factor = 0; 968 mat->insertmode = NOT_SET_VALUES; 969 MPI_Comm_rank(comm,&a->rank); 970 MPI_Comm_size(comm,&a->size); 971 972 if (M == PETSC_DECIDE) {ierr = MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);} 973 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 974 975 /* each row stores all columns */ 976 if (N == PETSC_DECIDE) N = n; 977 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 978 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 979 a->N = mat->N = N; 980 a->M = mat->M = M; 981 a->m = mat->m = m; 982 a->n = mat->n = n; 983 984 /* build local table of row and column ownerships */ 985 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 986 a->cowners = a->rowners + a->size + 1; 987 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 988 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 989 a->rowners[0] = 0; 990 for ( i=2; i<=a->size; i++ ) { 991 a->rowners[i] += a->rowners[i-1]; 992 } 993 a->rstart = a->rowners[a->rank]; 994 a->rend = a->rowners[a->rank+1]; 995 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 996 a->cowners[0] = 0; 997 for ( i=2; i<=a->size; i++ ) { 998 a->cowners[i] += a->cowners[i-1]; 999 } 1000 1001 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 1002 PLogObjectParent(mat,a->A); 1003 1004 /* build cache for off array entries formed */ 1005 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1006 1007 /* stuff used for matrix vector multiply */ 1008 a->lvec = 0; 1009 a->Mvctx = 0; 1010 a->roworiented = 1; 1011 1012 *A = mat; 1013 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1014 if (flg) { 1015 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1016 } 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNC__ 1021 #define __FUNC__ "MatConvertSameType_MPIDense" 1022 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 1023 { 1024 Mat mat; 1025 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1026 int ierr; 1027 FactorCtx *factor; 1028 1029 PetscFunctionBegin; 1030 *newmat = 0; 1031 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,A->comm,MatDestroy,MatView); 1032 PLogObjectCreate(mat); 1033 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 1034 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1035 mat->ops->destroy = MatDestroy_MPIDense; 1036 mat->ops->view = MatView_MPIDense; 1037 mat->factor = A->factor; 1038 mat->assembled = PETSC_TRUE; 1039 1040 a->m = mat->m = oldmat->m; 1041 a->n = mat->n = oldmat->n; 1042 a->M = mat->M = oldmat->M; 1043 a->N = mat->N = oldmat->N; 1044 if (oldmat->factor) { 1045 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 1046 /* copy factor contents ... add this code! */ 1047 } else a->factor = 0; 1048 1049 a->rstart = oldmat->rstart; 1050 a->rend = oldmat->rend; 1051 a->size = oldmat->size; 1052 a->rank = oldmat->rank; 1053 mat->insertmode = NOT_SET_VALUES; 1054 1055 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1056 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1057 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1058 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1059 1060 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1061 PLogObjectParent(mat,a->lvec); 1062 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1063 PLogObjectParent(mat,a->Mvctx); 1064 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1065 PLogObjectParent(mat,a->A); 1066 *newmat = mat; 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #include "sys.h" 1071 1072 #undef __FUNC__ 1073 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1074 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1075 { 1076 int *rowners, i,size,rank,m,ierr,nz,j; 1077 Scalar *array,*vals,*vals_ptr; 1078 MPI_Status status; 1079 1080 PetscFunctionBegin; 1081 MPI_Comm_rank(comm,&rank); 1082 MPI_Comm_size(comm,&size); 1083 1084 /* determine ownership of all rows */ 1085 m = M/size + ((M % size) > rank); 1086 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1087 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1088 rowners[0] = 0; 1089 for ( i=2; i<=size; i++ ) { 1090 rowners[i] += rowners[i-1]; 1091 } 1092 1093 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1094 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 1095 1096 if (!rank) { 1097 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1098 1099 /* read in my part of the matrix numerical values */ 1100 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR); CHKERRQ(ierr); 1101 1102 /* insert into matrix-by row (this is why cannot directly read into array */ 1103 vals_ptr = vals; 1104 for ( i=0; i<m; i++ ) { 1105 for ( j=0; j<N; j++ ) { 1106 array[i + j*m] = *vals_ptr++; 1107 } 1108 } 1109 1110 /* read in other processors and ship out */ 1111 for ( i=1; i<size; i++ ) { 1112 nz = (rowners[i+1] - rowners[i])*N; 1113 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1114 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1115 } 1116 } else { 1117 /* receive numeric values */ 1118 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1119 1120 /* receive message of values*/ 1121 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1122 1123 /* insert into matrix-by row (this is why cannot directly read into array */ 1124 vals_ptr = vals; 1125 for ( i=0; i<m; i++ ) { 1126 for ( j=0; j<N; j++ ) { 1127 array[i + j*m] = *vals_ptr++; 1128 } 1129 } 1130 } 1131 PetscFree(rowners); 1132 PetscFree(vals); 1133 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1134 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 1139 #undef __FUNC__ 1140 #define __FUNC__ "MatLoad_MPIDense" 1141 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1142 { 1143 Mat A; 1144 Scalar *vals,*svals; 1145 MPI_Comm comm = ((PetscObject)viewer)->comm; 1146 MPI_Status status; 1147 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1148 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1149 int tag = ((PetscObject)viewer)->tag; 1150 int i, nz, ierr, j,rstart, rend, fd; 1151 1152 PetscFunctionBegin; 1153 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1154 if (!rank) { 1155 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1156 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1157 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1158 } 1159 1160 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1161 M = header[1]; N = header[2]; nz = header[3]; 1162 1163 /* 1164 Handle case where matrix is stored on disk as a dense matrix 1165 */ 1166 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1167 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 /* determine ownership of all rows */ 1172 m = M/size + ((M % size) > rank); 1173 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1174 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1175 rowners[0] = 0; 1176 for ( i=2; i<=size; i++ ) { 1177 rowners[i] += rowners[i-1]; 1178 } 1179 rstart = rowners[rank]; 1180 rend = rowners[rank+1]; 1181 1182 /* distribute row lengths to all processors */ 1183 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1184 offlens = ourlens + (rend-rstart); 1185 if (!rank) { 1186 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1187 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1188 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1189 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1190 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1191 PetscFree(sndcounts); 1192 } else { 1193 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1194 } 1195 1196 if (!rank) { 1197 /* calculate the number of nonzeros on each processor */ 1198 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1199 PetscMemzero(procsnz,size*sizeof(int)); 1200 for ( i=0; i<size; i++ ) { 1201 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1202 procsnz[i] += rowlengths[j]; 1203 } 1204 } 1205 PetscFree(rowlengths); 1206 1207 /* determine max buffer needed and allocate it */ 1208 maxnz = 0; 1209 for ( i=0; i<size; i++ ) { 1210 maxnz = PetscMax(maxnz,procsnz[i]); 1211 } 1212 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1213 1214 /* read in my part of the matrix column indices */ 1215 nz = procsnz[0]; 1216 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1217 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1218 1219 /* read in every one elses and ship off */ 1220 for ( i=1; i<size; i++ ) { 1221 nz = procsnz[i]; 1222 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1223 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1224 } 1225 PetscFree(cols); 1226 } else { 1227 /* determine buffer space needed for message */ 1228 nz = 0; 1229 for ( i=0; i<m; i++ ) { 1230 nz += ourlens[i]; 1231 } 1232 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1233 1234 /* receive message of column indices*/ 1235 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1236 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1237 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1238 } 1239 1240 /* loop over local rows, determining number of off diagonal entries */ 1241 PetscMemzero(offlens,m*sizeof(int)); 1242 jj = 0; 1243 for ( i=0; i<m; i++ ) { 1244 for ( j=0; j<ourlens[i]; j++ ) { 1245 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1246 jj++; 1247 } 1248 } 1249 1250 /* create our matrix */ 1251 for ( i=0; i<m; i++ ) { 1252 ourlens[i] -= offlens[i]; 1253 } 1254 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1255 A = *newmat; 1256 for ( i=0; i<m; i++ ) { 1257 ourlens[i] += offlens[i]; 1258 } 1259 1260 if (!rank) { 1261 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1262 1263 /* read in my part of the matrix numerical values */ 1264 nz = procsnz[0]; 1265 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1266 1267 /* insert into matrix */ 1268 jj = rstart; 1269 smycols = mycols; 1270 svals = vals; 1271 for ( i=0; i<m; i++ ) { 1272 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1273 smycols += ourlens[i]; 1274 svals += ourlens[i]; 1275 jj++; 1276 } 1277 1278 /* read in other processors and ship out */ 1279 for ( i=1; i<size; i++ ) { 1280 nz = procsnz[i]; 1281 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1282 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1283 } 1284 PetscFree(procsnz); 1285 } else { 1286 /* receive numeric values */ 1287 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1288 1289 /* receive message of values*/ 1290 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1291 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1292 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1293 1294 /* insert into matrix */ 1295 jj = rstart; 1296 smycols = mycols; 1297 svals = vals; 1298 for ( i=0; i<m; i++ ) { 1299 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1300 smycols += ourlens[i]; 1301 svals += ourlens[i]; 1302 jj++; 1303 } 1304 } 1305 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1306 1307 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1308 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 1313 1314 1315 1316