1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.118 1999/07/26 01:16:26 curfman Exp curfman $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 extern int MatSetUpMultiply_MPIDense(Mat); 13 14 #undef __FUNC__ 15 #define __FUNC__ "MatSetValues_MPIDense" 16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 17 { 18 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 19 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 20 int roworiented = A->roworiented; 21 22 PetscFunctionBegin; 23 for ( i=0; i<m; i++ ) { 24 if (idxm[i] < 0) continue; 25 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 26 if (idxm[i] >= rstart && idxm[i] < rend) { 27 row = idxm[i] - rstart; 28 if (roworiented) { 29 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 30 } else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) continue; 33 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 35 } 36 } 37 } else { 38 if ( !A->donotstash) { 39 if (roworiented) { 40 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 41 } else { 42 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 43 } 44 } 45 } 46 } 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNC__ 51 #define __FUNC__ "MatGetValues_MPIDense" 52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 53 { 54 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 55 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 56 57 PetscFunctionBegin; 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 65 if (idxn[j] >= mdn->N) { 66 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 67 } 68 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 69 } 70 } else { 71 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 PetscFunctionBegin; 85 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNC__ 90 #define __FUNC__ "MatGetSubMatrix_MPIDense" 91 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 92 { 93 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 94 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 95 int i, j, ierr, *irow, *icol, nrows, ncols, nlrows, nlcols; 96 Scalar *av, *bv, *v = lmat->v; 97 Mat newmat; 98 99 PetscFunctionBegin; 100 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 101 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 102 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 103 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 104 105 /* No parallel redistribution currently supported! Should really check each index set 106 to comfirm that it is OK ... temporarily just support retaining all rows on each proc. */ 107 108 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 109 if (nlrows != nrows) SETERRQ(PETSC_ERR_ARG_SIZ,0, 110 "Currently supports only submatrix with ALL rows of additional matrix (same partitioning)"); 111 112 /* Check submatrix call */ 113 if (scall == MAT_REUSE_MATRIX) { 114 int n_cols,n_rows; 115 ierr = MatGetSize(*B,&n_rows,&n_cols);CHKERRQ(ierr); 116 if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); 117 newmat = *B; 118 } else { 119 /* Create and fill new matrix */ 120 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 121 } 122 123 /* Now extract the data pointers and do the copy, column at a time */ 124 newmatd = (Mat_MPIDense *) newmat->data; 125 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 126 127 for ( i=0; i<ncols; i++ ) { 128 av = v + nlrows*icol[i]; 129 for (j=0; j<nrows; j++ ) { 130 *bv++ = av[irow[j]]; 131 } 132 } 133 134 /* Assemble the matrices so that the correct flags are set */ 135 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 136 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 137 138 /* Free work space */ 139 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 140 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 141 *B = newmat; 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNC__ 146 #define __FUNC__ "MatRestoreArray_MPIDense" 147 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 148 { 149 PetscFunctionBegin; 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNC__ 154 #define __FUNC__ "MatAssemblyBegin_MPIDense" 155 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 156 { 157 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 158 MPI_Comm comm = mat->comm; 159 int ierr,nstash,reallocs; 160 InsertMode addv; 161 162 PetscFunctionBegin; 163 /* make sure all processors are either in INSERTMODE or ADDMODE */ 164 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 165 if (addv == (ADD_VALUES|INSERT_VALUES)) { 166 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 167 } 168 mat->insertmode = addv; /* in case this processor had no cache */ 169 170 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 171 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 172 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 173 nstash,reallocs); 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNC__ 178 #define __FUNC__ "MatAssemblyEnd_MPIDense" 179 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 180 { 181 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 182 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 183 Scalar *val; 184 InsertMode addv=mat->insertmode; 185 186 PetscFunctionBegin; 187 /* wait on receives */ 188 while (1) { 189 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 190 if (!flg) break; 191 192 for ( i=0; i<n; ) { 193 /* Now identify the consecutive vals belonging to the same row */ 194 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 195 if (j < n) ncols = j-i; 196 else ncols = n-i; 197 /* Now assemble all these values with a single function call */ 198 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 199 i = j; 200 } 201 } 202 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 203 204 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 205 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 206 207 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 208 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 209 } 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNC__ 214 #define __FUNC__ "MatZeroEntries_MPIDense" 215 int MatZeroEntries_MPIDense(Mat A) 216 { 217 int ierr; 218 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 219 220 PetscFunctionBegin; 221 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNC__ 226 #define __FUNC__ "MatGetBlockSize_MPIDense" 227 int MatGetBlockSize_MPIDense(Mat A,int *bs) 228 { 229 PetscFunctionBegin; 230 *bs = 1; 231 PetscFunctionReturn(0); 232 } 233 234 /* the code does not do the diagonal entries correctly unless the 235 matrix is square and the column and row owerships are identical. 236 This is a BUG. The only way to fix it seems to be to access 237 mdn->A and mdn->B directly and not through the MatZeroRows() 238 routine. 239 */ 240 #undef __FUNC__ 241 #define __FUNC__ "MatZeroRows_MPIDense" 242 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 243 { 244 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 245 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 246 int *procs,*nprocs,j,found,idx,nsends,*work; 247 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 248 int *rvalues,tag = A->tag,count,base,slen,n,*source; 249 int *lens,imdex,*lrows,*values; 250 MPI_Comm comm = A->comm; 251 MPI_Request *send_waits,*recv_waits; 252 MPI_Status recv_status,*send_status; 253 IS istmp; 254 255 PetscFunctionBegin; 256 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 257 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 258 259 /* first count number of contributors to each processor */ 260 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 261 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 262 procs = nprocs + size; 263 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 264 for ( i=0; i<N; i++ ) { 265 idx = rows[i]; 266 found = 0; 267 for ( j=0; j<size; j++ ) { 268 if (idx >= owners[j] && idx < owners[j+1]) { 269 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 270 } 271 } 272 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 273 } 274 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 275 276 /* inform other processors of number of messages and max length*/ 277 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 278 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 279 nrecvs = work[rank]; 280 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 281 nmax = work[rank]; 282 ierr = PetscFree(work);CHKERRQ(ierr); 283 284 /* post receives: */ 285 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 286 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 287 for ( i=0; i<nrecvs; i++ ) { 288 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 289 } 290 291 /* do sends: 292 1) starts[i] gives the starting index in svalues for stuff going to 293 the ith processor 294 */ 295 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 296 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 297 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 298 starts[0] = 0; 299 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 300 for ( i=0; i<N; i++ ) { 301 svalues[starts[owner[i]]++] = rows[i]; 302 } 303 ISRestoreIndices(is,&rows); 304 305 starts[0] = 0; 306 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 307 count = 0; 308 for ( i=0; i<size; i++ ) { 309 if (procs[i]) { 310 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 311 } 312 } 313 ierr = PetscFree(starts);CHKERRQ(ierr); 314 315 base = owners[rank]; 316 317 /* wait on receives */ 318 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 319 source = lens + nrecvs; 320 count = nrecvs; slen = 0; 321 while (count) { 322 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 323 /* unpack receives into our local space */ 324 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 325 source[imdex] = recv_status.MPI_SOURCE; 326 lens[imdex] = n; 327 slen += n; 328 count--; 329 } 330 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 331 332 /* move the data into the send scatter */ 333 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 334 count = 0; 335 for ( i=0; i<nrecvs; i++ ) { 336 values = rvalues + i*nmax; 337 for ( j=0; j<lens[i]; j++ ) { 338 lrows[count++] = values[j] - base; 339 } 340 } 341 ierr = PetscFree(rvalues);CHKERRQ(ierr); 342 ierr = PetscFree(lens);CHKERRQ(ierr); 343 ierr = PetscFree(owner);CHKERRQ(ierr); 344 ierr = PetscFree(nprocs);CHKERRQ(ierr); 345 346 /* actually zap the local rows */ 347 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 348 PLogObjectParent(A,istmp); 349 ierr = PetscFree(lrows);CHKERRQ(ierr); 350 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 351 ierr = ISDestroy(istmp);CHKERRQ(ierr); 352 353 /* wait on sends */ 354 if (nsends) { 355 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 356 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 357 ierr = PetscFree(send_status);CHKERRQ(ierr); 358 } 359 ierr = PetscFree(send_waits);CHKERRQ(ierr); 360 ierr = PetscFree(svalues);CHKERRQ(ierr); 361 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNC__ 366 #define __FUNC__ "MatMult_MPIDense" 367 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 368 { 369 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 370 int ierr; 371 372 PetscFunctionBegin; 373 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 374 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 375 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 #undef __FUNC__ 380 #define __FUNC__ "MatMultAdd_MPIDense" 381 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 382 { 383 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 384 int ierr; 385 386 PetscFunctionBegin; 387 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 388 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 389 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNC__ 394 #define __FUNC__ "MatMultTrans_MPIDense" 395 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 396 { 397 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 398 int ierr; 399 Scalar zero = 0.0; 400 401 PetscFunctionBegin; 402 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 403 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 404 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 405 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNC__ 410 #define __FUNC__ "MatMultTransAdd_MPIDense" 411 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 412 { 413 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 414 int ierr; 415 416 PetscFunctionBegin; 417 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 418 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 419 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 420 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatGetDiagonal_MPIDense" 426 int MatGetDiagonal_MPIDense(Mat A,Vec v) 427 { 428 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 429 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 430 int ierr, len, i, n, m = a->m, radd; 431 Scalar *x, zero = 0.0; 432 433 PetscFunctionBegin; 434 VecSet(&zero,v); 435 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 436 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 437 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 438 len = PetscMin(aloc->m,aloc->n); 439 radd = a->rstart*m; 440 for ( i=0; i<len; i++ ) { 441 x[i] = aloc->v[radd + i*m + i]; 442 } 443 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNC__ 448 #define __FUNC__ "MatDestroy_MPIDense" 449 int MatDestroy_MPIDense(Mat mat) 450 { 451 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 452 int ierr; 453 454 PetscFunctionBegin; 455 456 if (mat->mapping) { 457 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 458 } 459 if (mat->bmapping) { 460 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 461 } 462 #if defined(PETSC_USE_LOG) 463 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 464 #endif 465 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 466 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 467 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 468 if (mdn->lvec) VecDestroy(mdn->lvec); 469 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 470 if (mdn->factor) { 471 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 472 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 473 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 474 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 475 } 476 ierr = PetscFree(mdn);CHKERRQ(ierr); 477 if (mat->rmap) { 478 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 479 } 480 if (mat->cmap) { 481 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 482 } 483 PLogObjectDestroy(mat); 484 PetscHeaderDestroy(mat); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ "MatView_MPIDense_Binary" 490 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 491 { 492 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 493 int ierr; 494 495 PetscFunctionBegin; 496 if (mdn->size == 1) { 497 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 498 } 499 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNC__ 504 #define __FUNC__ "MatView_MPIDense_ASCII" 505 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 506 { 507 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 508 int ierr, format, size = mdn->size, rank = mdn->rank; 509 FILE *fd; 510 ViewerType vtype; 511 512 PetscFunctionBegin; 513 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 514 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 515 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 516 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 517 MatInfo info; 518 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 519 PetscSequentialPhaseBegin(mat->comm,1); 520 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 521 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 522 fflush(fd); 523 PetscSequentialPhaseEnd(mat->comm,1); 524 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 525 PetscFunctionReturn(0); 526 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 527 PetscFunctionReturn(0); 528 } 529 530 if (size == 1) { 531 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 532 } else { 533 /* assemble the entire matrix onto first processor. */ 534 Mat A; 535 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 536 Scalar *vals; 537 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 538 539 if (!rank) { 540 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 541 } else { 542 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 543 } 544 PLogObjectParent(mat,A); 545 546 /* Copy the matrix ... This isn't the most efficient means, 547 but it's quick for now */ 548 row = mdn->rstart; m = Amdn->m; 549 for ( i=0; i<m; i++ ) { 550 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 551 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 552 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 553 row++; 554 } 555 556 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 if (!rank) { 559 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 560 } 561 ierr = MatDestroy(A);CHKERRQ(ierr); 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNC__ 567 #define __FUNC__ "MatView_MPIDense" 568 int MatView_MPIDense(Mat mat,Viewer viewer) 569 { 570 int ierr; 571 ViewerType vtype; 572 573 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 574 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 575 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 576 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 577 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 578 } else { 579 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 580 } 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNC__ 585 #define __FUNC__ "MatGetInfo_MPIDense" 586 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 587 { 588 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 589 Mat mdn = mat->A; 590 int ierr; 591 double isend[5], irecv[5]; 592 593 PetscFunctionBegin; 594 info->rows_global = (double)mat->M; 595 info->columns_global = (double)mat->N; 596 info->rows_local = (double)mat->m; 597 info->columns_local = (double)mat->N; 598 info->block_size = 1.0; 599 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 600 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 601 isend[3] = info->memory; isend[4] = info->mallocs; 602 if (flag == MAT_LOCAL) { 603 info->nz_used = isend[0]; 604 info->nz_allocated = isend[1]; 605 info->nz_unneeded = isend[2]; 606 info->memory = isend[3]; 607 info->mallocs = isend[4]; 608 } else if (flag == MAT_GLOBAL_MAX) { 609 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 610 info->nz_used = irecv[0]; 611 info->nz_allocated = irecv[1]; 612 info->nz_unneeded = irecv[2]; 613 info->memory = irecv[3]; 614 info->mallocs = irecv[4]; 615 } else if (flag == MAT_GLOBAL_SUM) { 616 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 617 info->nz_used = irecv[0]; 618 info->nz_allocated = irecv[1]; 619 info->nz_unneeded = irecv[2]; 620 info->memory = irecv[3]; 621 info->mallocs = irecv[4]; 622 } 623 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 624 info->fill_ratio_needed = 0; 625 info->factor_mallocs = 0; 626 PetscFunctionReturn(0); 627 } 628 629 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 630 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 631 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 632 extern int MatSolve_MPIDense(Mat,Vec,Vec); 633 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 634 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 635 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 636 637 #undef __FUNC__ 638 #define __FUNC__ "MatSetOption_MPIDense" 639 int MatSetOption_MPIDense(Mat A,MatOption op) 640 { 641 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 642 643 PetscFunctionBegin; 644 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 645 op == MAT_YES_NEW_NONZERO_LOCATIONS || 646 op == MAT_NEW_NONZERO_LOCATION_ERR || 647 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 648 op == MAT_COLUMNS_SORTED || 649 op == MAT_COLUMNS_UNSORTED) { 650 MatSetOption(a->A,op); 651 } else if (op == MAT_ROW_ORIENTED) { 652 a->roworiented = 1; 653 MatSetOption(a->A,op); 654 } else if (op == MAT_ROWS_SORTED || 655 op == MAT_ROWS_UNSORTED || 656 op == MAT_SYMMETRIC || 657 op == MAT_STRUCTURALLY_SYMMETRIC || 658 op == MAT_YES_NEW_DIAGONALS || 659 op == MAT_USE_HASH_TABLE) { 660 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 661 } else if (op == MAT_COLUMN_ORIENTED) { 662 a->roworiented = 0; MatSetOption(a->A,op); 663 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 664 a->donotstash = 1; 665 } else if (op == MAT_NO_NEW_DIAGONALS) { 666 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 667 } else { 668 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 669 } 670 PetscFunctionReturn(0); 671 } 672 673 #undef __FUNC__ 674 #define __FUNC__ "MatGetSize_MPIDense" 675 int MatGetSize_MPIDense(Mat A,int *m,int *n) 676 { 677 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 678 679 PetscFunctionBegin; 680 *m = mat->M; *n = mat->N; 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNC__ 685 #define __FUNC__ "MatGetLocalSize_MPIDense" 686 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 687 { 688 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 689 690 PetscFunctionBegin; 691 *m = mat->m; *n = mat->N; 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 697 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 698 { 699 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 700 701 PetscFunctionBegin; 702 *m = mat->rstart; *n = mat->rend; 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNC__ 707 #define __FUNC__ "MatGetRow_MPIDense" 708 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 709 { 710 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 711 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 712 713 PetscFunctionBegin; 714 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 715 lrow = row - rstart; 716 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNC__ 721 #define __FUNC__ "MatRestoreRow_MPIDense" 722 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 723 { 724 int ierr; 725 726 PetscFunctionBegin; 727 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 728 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNC__ 733 #define __FUNC__ "MatNorm_MPIDense" 734 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 735 { 736 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 737 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 738 int ierr, i, j; 739 double sum = 0.0; 740 Scalar *v = mat->v; 741 742 PetscFunctionBegin; 743 if (mdn->size == 1) { 744 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 745 } else { 746 if (type == NORM_FROBENIUS) { 747 for (i=0; i<mat->n*mat->m; i++ ) { 748 #if defined(PETSC_USE_COMPLEX) 749 sum += PetscReal(PetscConj(*v)*(*v)); v++; 750 #else 751 sum += (*v)*(*v); v++; 752 #endif 753 } 754 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 755 *norm = sqrt(*norm); 756 PLogFlops(2*mat->n*mat->m); 757 } else if (type == NORM_1) { 758 double *tmp, *tmp2; 759 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 760 tmp2 = tmp + mdn->N; 761 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 762 *norm = 0.0; 763 v = mat->v; 764 for ( j=0; j<mat->n; j++ ) { 765 for ( i=0; i<mat->m; i++ ) { 766 tmp[j] += PetscAbsScalar(*v); v++; 767 } 768 } 769 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 770 for ( j=0; j<mdn->N; j++ ) { 771 if (tmp2[j] > *norm) *norm = tmp2[j]; 772 } 773 ierr = PetscFree(tmp);CHKERRQ(ierr); 774 PLogFlops(mat->n*mat->m); 775 } else if (type == NORM_INFINITY) { /* max row norm */ 776 double ntemp; 777 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 778 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 779 } else { 780 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 781 } 782 } 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNC__ 787 #define __FUNC__ "MatTranspose_MPIDense" 788 int MatTranspose_MPIDense(Mat A,Mat *matout) 789 { 790 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 791 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 792 Mat B; 793 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 794 int j, i, ierr; 795 Scalar *v; 796 797 PetscFunctionBegin; 798 if (matout == PETSC_NULL && M != N) { 799 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 800 } 801 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 802 803 m = Aloc->m; n = Aloc->n; v = Aloc->v; 804 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 805 for ( j=0; j<n; j++ ) { 806 for (i=0; i<m; i++) rwork[i] = rstart + i; 807 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 808 v += m; 809 } 810 ierr = PetscFree(rwork);CHKERRQ(ierr); 811 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 812 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 813 if (matout != PETSC_NULL) { 814 *matout = B; 815 } else { 816 PetscOps *Abops; 817 MatOps Aops; 818 819 /* This isn't really an in-place transpose, but free data struct from a */ 820 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 821 ierr = MatDestroy(a->A);CHKERRQ(ierr); 822 if (a->lvec) VecDestroy(a->lvec); 823 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 824 ierr = PetscFree(a);CHKERRQ(ierr); 825 826 /* 827 This is horrible, horrible code. We need to keep the 828 A pointers for the bops and ops but copy everything 829 else from C. 830 */ 831 Abops = A->bops; 832 Aops = A->ops; 833 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 834 A->bops = Abops; 835 A->ops = Aops; 836 837 PetscHeaderDestroy(B); 838 } 839 PetscFunctionReturn(0); 840 } 841 842 #include "pinclude/blaslapack.h" 843 #undef __FUNC__ 844 #define __FUNC__ "MatScale_MPIDense" 845 int MatScale_MPIDense(Scalar *alpha,Mat inA) 846 { 847 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 848 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 849 int one = 1, nz; 850 851 PetscFunctionBegin; 852 nz = a->m*a->n; 853 BLscal_( &nz, alpha, a->v, &one ); 854 PLogFlops(nz); 855 PetscFunctionReturn(0); 856 } 857 858 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 859 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 860 861 /* -------------------------------------------------------------------*/ 862 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 863 MatGetRow_MPIDense, 864 MatRestoreRow_MPIDense, 865 MatMult_MPIDense, 866 MatMultAdd_MPIDense, 867 MatMultTrans_MPIDense, 868 MatMultTransAdd_MPIDense, 869 0, 870 0, 871 0, 872 0, 873 0, 874 0, 875 0, 876 MatTranspose_MPIDense, 877 MatGetInfo_MPIDense,0, 878 MatGetDiagonal_MPIDense, 879 0, 880 MatNorm_MPIDense, 881 MatAssemblyBegin_MPIDense, 882 MatAssemblyEnd_MPIDense, 883 0, 884 MatSetOption_MPIDense, 885 MatZeroEntries_MPIDense, 886 MatZeroRows_MPIDense, 887 0, 888 0, 889 0, 890 0, 891 MatGetSize_MPIDense, 892 MatGetLocalSize_MPIDense, 893 MatGetOwnershipRange_MPIDense, 894 0, 895 0, 896 MatGetArray_MPIDense, 897 MatRestoreArray_MPIDense, 898 MatDuplicate_MPIDense, 899 0, 900 0, 901 0, 902 0, 903 0, 904 MatGetSubMatrices_MPIDense, 905 0, 906 MatGetValues_MPIDense, 907 0, 908 0, 909 MatScale_MPIDense, 910 0, 911 0, 912 0, 913 MatGetBlockSize_MPIDense, 914 0, 915 0, 916 0, 917 0, 918 0, 919 0, 920 0, 921 0, 922 0, 923 MatGetSubMatrix_MPIDense, 924 0, 925 0, 926 MatGetMaps_Petsc}; 927 928 #undef __FUNC__ 929 #define __FUNC__ "MatCreateMPIDense" 930 /*@C 931 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 932 933 Collective on MPI_Comm 934 935 Input Parameters: 936 + comm - MPI communicator 937 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 938 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 939 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 940 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 941 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 942 to control all matrix memory allocation. 943 944 Output Parameter: 945 . A - the matrix 946 947 Notes: 948 The dense format is fully compatible with standard Fortran 77 949 storage by columns. 950 951 The data input variable is intended primarily for Fortran programmers 952 who wish to allocate their own matrix memory space. Most users should 953 set data=PETSC_NULL. 954 955 The user MUST specify either the local or global matrix dimensions 956 (possibly both). 957 958 Currently, the only parallel dense matrix decomposition is by rows, 959 so that n=N and each submatrix owns all of the global columns. 960 961 Level: intermediate 962 963 .keywords: matrix, dense, parallel 964 965 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 966 @*/ 967 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 968 { 969 Mat mat; 970 Mat_MPIDense *a; 971 int ierr, i,flg; 972 973 PetscFunctionBegin; 974 /* Note: For now, when data is specified above, this assumes the user correctly 975 allocates the local dense storage space. We should add error checking. */ 976 977 *A = 0; 978 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 979 PLogObjectCreate(mat); 980 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 981 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 982 mat->ops->destroy = MatDestroy_MPIDense; 983 mat->ops->view = MatView_MPIDense; 984 mat->factor = 0; 985 mat->mapping = 0; 986 987 a->factor = 0; 988 mat->insertmode = NOT_SET_VALUES; 989 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 990 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 991 992 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 993 994 /* 995 The computation of n is wrong below, n should represent the number of local 996 rows in the right (column vector) 997 */ 998 999 /* each row stores all columns */ 1000 if (N == PETSC_DECIDE) N = n; 1001 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1002 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 1003 a->N = mat->N = N; 1004 a->M = mat->M = M; 1005 a->m = mat->m = m; 1006 a->n = mat->n = n; 1007 1008 /* the information in the maps duplicates the information computed below, eventually 1009 we should remove the duplicate information that is not contained in the maps */ 1010 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1011 ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr); 1012 1013 /* build local table of row and column ownerships */ 1014 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1015 a->cowners = a->rowners + a->size + 1; 1016 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1017 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1018 a->rowners[0] = 0; 1019 for ( i=2; i<=a->size; i++ ) { 1020 a->rowners[i] += a->rowners[i-1]; 1021 } 1022 a->rstart = a->rowners[a->rank]; 1023 a->rend = a->rowners[a->rank+1]; 1024 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1025 a->cowners[0] = 0; 1026 for ( i=2; i<=a->size; i++ ) { 1027 a->cowners[i] += a->cowners[i-1]; 1028 } 1029 1030 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1031 PLogObjectParent(mat,a->A); 1032 1033 /* build cache for off array entries formed */ 1034 a->donotstash = 0; 1035 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1036 1037 /* stuff used for matrix vector multiply */ 1038 a->lvec = 0; 1039 a->Mvctx = 0; 1040 a->roworiented = 1; 1041 1042 *A = mat; 1043 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1044 if (flg) { 1045 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1046 } 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNC__ 1051 #define __FUNC__ "MatDuplicate_MPIDense" 1052 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1053 { 1054 Mat mat; 1055 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1056 int ierr; 1057 FactorCtx *factor; 1058 1059 PetscFunctionBegin; 1060 *newmat = 0; 1061 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1062 PLogObjectCreate(mat); 1063 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1064 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1065 mat->ops->destroy = MatDestroy_MPIDense; 1066 mat->ops->view = MatView_MPIDense; 1067 mat->factor = A->factor; 1068 mat->assembled = PETSC_TRUE; 1069 1070 a->m = mat->m = oldmat->m; 1071 a->n = mat->n = oldmat->n; 1072 a->M = mat->M = oldmat->M; 1073 a->N = mat->N = oldmat->N; 1074 if (oldmat->factor) { 1075 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1076 /* copy factor contents ... add this code! */ 1077 } else a->factor = 0; 1078 1079 a->rstart = oldmat->rstart; 1080 a->rend = oldmat->rend; 1081 a->size = oldmat->size; 1082 a->rank = oldmat->rank; 1083 mat->insertmode = NOT_SET_VALUES; 1084 a->donotstash = oldmat->donotstash; 1085 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1086 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1087 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1088 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1089 1090 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1091 PLogObjectParent(mat,a->lvec); 1092 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1093 PLogObjectParent(mat,a->Mvctx); 1094 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1095 PLogObjectParent(mat,a->A); 1096 *newmat = mat; 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #include "sys.h" 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1104 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1105 { 1106 int *rowners, i,size,rank,m,ierr,nz,j; 1107 Scalar *array,*vals,*vals_ptr; 1108 MPI_Status status; 1109 1110 PetscFunctionBegin; 1111 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1113 1114 /* determine ownership of all rows */ 1115 m = M/size + ((M % size) > rank); 1116 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1117 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1118 rowners[0] = 0; 1119 for ( i=2; i<=size; i++ ) { 1120 rowners[i] += rowners[i-1]; 1121 } 1122 1123 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1124 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1125 1126 if (!rank) { 1127 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1128 1129 /* read in my part of the matrix numerical values */ 1130 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1131 1132 /* insert into matrix-by row (this is why cannot directly read into array */ 1133 vals_ptr = vals; 1134 for ( i=0; i<m; i++ ) { 1135 for ( j=0; j<N; j++ ) { 1136 array[i + j*m] = *vals_ptr++; 1137 } 1138 } 1139 1140 /* read in other processors and ship out */ 1141 for ( i=1; i<size; i++ ) { 1142 nz = (rowners[i+1] - rowners[i])*N; 1143 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1144 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1145 } 1146 } else { 1147 /* receive numeric values */ 1148 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1149 1150 /* receive message of values*/ 1151 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1152 1153 /* insert into matrix-by row (this is why cannot directly read into array */ 1154 vals_ptr = vals; 1155 for ( i=0; i<m; i++ ) { 1156 for ( j=0; j<N; j++ ) { 1157 array[i + j*m] = *vals_ptr++; 1158 } 1159 } 1160 } 1161 ierr = PetscFree(rowners);CHKERRQ(ierr); 1162 ierr = PetscFree(vals);CHKERRQ(ierr); 1163 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 1169 #undef __FUNC__ 1170 #define __FUNC__ "MatLoad_MPIDense" 1171 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1172 { 1173 Mat A; 1174 Scalar *vals,*svals; 1175 MPI_Comm comm = ((PetscObject)viewer)->comm; 1176 MPI_Status status; 1177 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1178 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1179 int tag = ((PetscObject)viewer)->tag; 1180 int i, nz, ierr, j,rstart, rend, fd; 1181 1182 PetscFunctionBegin; 1183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1185 if (!rank) { 1186 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1187 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1188 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1189 } 1190 1191 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1192 M = header[1]; N = header[2]; nz = header[3]; 1193 1194 /* 1195 Handle case where matrix is stored on disk as a dense matrix 1196 */ 1197 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1198 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 /* determine ownership of all rows */ 1203 m = M/size + ((M % size) > rank); 1204 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1205 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1206 rowners[0] = 0; 1207 for ( i=2; i<=size; i++ ) { 1208 rowners[i] += rowners[i-1]; 1209 } 1210 rstart = rowners[rank]; 1211 rend = rowners[rank+1]; 1212 1213 /* distribute row lengths to all processors */ 1214 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1215 offlens = ourlens + (rend-rstart); 1216 if (!rank) { 1217 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1218 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1219 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1220 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1221 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1222 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1223 } else { 1224 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1225 } 1226 1227 if (!rank) { 1228 /* calculate the number of nonzeros on each processor */ 1229 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1230 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1231 for ( i=0; i<size; i++ ) { 1232 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1233 procsnz[i] += rowlengths[j]; 1234 } 1235 } 1236 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1237 1238 /* determine max buffer needed and allocate it */ 1239 maxnz = 0; 1240 for ( i=0; i<size; i++ ) { 1241 maxnz = PetscMax(maxnz,procsnz[i]); 1242 } 1243 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1244 1245 /* read in my part of the matrix column indices */ 1246 nz = procsnz[0]; 1247 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1248 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1249 1250 /* read in every one elses and ship off */ 1251 for ( i=1; i<size; i++ ) { 1252 nz = procsnz[i]; 1253 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1254 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1255 } 1256 ierr = PetscFree(cols);CHKERRQ(ierr); 1257 } else { 1258 /* determine buffer space needed for message */ 1259 nz = 0; 1260 for ( i=0; i<m; i++ ) { 1261 nz += ourlens[i]; 1262 } 1263 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1264 1265 /* receive message of column indices*/ 1266 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1267 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1268 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1269 } 1270 1271 /* loop over local rows, determining number of off diagonal entries */ 1272 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1273 jj = 0; 1274 for ( i=0; i<m; i++ ) { 1275 for ( j=0; j<ourlens[i]; j++ ) { 1276 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1277 jj++; 1278 } 1279 } 1280 1281 /* create our matrix */ 1282 for ( i=0; i<m; i++ ) { 1283 ourlens[i] -= offlens[i]; 1284 } 1285 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1286 A = *newmat; 1287 for ( i=0; i<m; i++ ) { 1288 ourlens[i] += offlens[i]; 1289 } 1290 1291 if (!rank) { 1292 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1293 1294 /* read in my part of the matrix numerical values */ 1295 nz = procsnz[0]; 1296 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1297 1298 /* insert into matrix */ 1299 jj = rstart; 1300 smycols = mycols; 1301 svals = vals; 1302 for ( i=0; i<m; i++ ) { 1303 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1304 smycols += ourlens[i]; 1305 svals += ourlens[i]; 1306 jj++; 1307 } 1308 1309 /* read in other processors and ship out */ 1310 for ( i=1; i<size; i++ ) { 1311 nz = procsnz[i]; 1312 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1313 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1314 } 1315 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1316 } else { 1317 /* receive numeric values */ 1318 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1319 1320 /* receive message of values*/ 1321 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1322 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1323 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1324 1325 /* insert into matrix */ 1326 jj = rstart; 1327 smycols = mycols; 1328 svals = vals; 1329 for ( i=0; i<m; i++ ) { 1330 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1331 smycols += ourlens[i]; 1332 svals += ourlens[i]; 1333 jj++; 1334 } 1335 } 1336 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1337 ierr = PetscFree(vals);CHKERRQ(ierr); 1338 ierr = PetscFree(mycols);CHKERRQ(ierr); 1339 ierr = PetscFree(rowners);CHKERRQ(ierr); 1340 1341 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1342 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 1347 1348 1349 1350