1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.125 1999/09/20 19:35:08 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 EXTERN_C_BEGIN 13 #undef __FUNC__ 14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 16 { 17 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 18 int m = mdn->m,rstart = mdn->rstart,rank,ierr; 19 Scalar *array; 20 MPI_Comm comm; 21 22 PetscFunctionBegin; 23 if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 24 25 /* The reuse aspect is not implemented efficiently */ 26 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 27 28 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 29 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 30 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 32 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 33 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 34 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 35 36 *iscopy = PETSC_TRUE; 37 PetscFunctionReturn(0); 38 } 39 EXTERN_C_END 40 41 extern int MatSetUpMultiply_MPIDense(Mat); 42 43 #undef __FUNC__ 44 #define __FUNC__ "MatSetValues_MPIDense" 45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 46 { 47 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 48 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 49 int roworiented = A->roworiented; 50 51 PetscFunctionBegin; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) continue; 54 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 if (roworiented) { 58 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 59 } else { 60 for ( j=0; j<n; j++ ) { 61 if (idxn[j] < 0) continue; 62 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 63 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 64 } 65 } 66 } else { 67 if ( !A->donotstash) { 68 if (roworiented) { 69 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 70 } else { 71 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 72 } 73 } 74 } 75 } 76 PetscFunctionReturn(0); 77 } 78 79 #undef __FUNC__ 80 #define __FUNC__ "MatGetValues_MPIDense" 81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 82 { 83 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 84 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 85 86 PetscFunctionBegin; 87 for ( i=0; i<m; i++ ) { 88 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 89 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 90 if (idxm[i] >= rstart && idxm[i] < rend) { 91 row = idxm[i] - rstart; 92 for ( j=0; j<n; j++ ) { 93 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 94 if (idxn[j] >= mdn->N) { 95 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 96 } 97 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 98 } 99 } else { 100 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 101 } 102 } 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNC__ 107 #define __FUNC__ "MatGetArray_MPIDense" 108 int MatGetArray_MPIDense(Mat A,Scalar **array) 109 { 110 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 111 int ierr; 112 113 PetscFunctionBegin; 114 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNC__ 119 #define __FUNC__ "MatGetSubMatrix_MPIDense" 120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 121 { 122 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 123 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 124 int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 125 Scalar *av, *bv, *v = lmat->v; 126 Mat newmat; 127 128 PetscFunctionBegin; 129 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 130 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 131 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 132 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 133 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 134 135 /* No parallel redistribution currently supported! Should really check each index set 136 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 137 original matrix! */ 138 139 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 140 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 141 142 /* Check submatrix call */ 143 if (scall == MAT_REUSE_MATRIX) { 144 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 145 /* Really need to test rows and column sizes! */ 146 newmat = *B; 147 } else { 148 /* Create and fill new matrix */ 149 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 150 } 151 152 /* Now extract the data pointers and do the copy, column at a time */ 153 newmatd = (Mat_MPIDense *) newmat->data; 154 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 155 156 for ( i=0; i<ncols; i++ ) { 157 av = v + nlrows*icol[i]; 158 for (j=0; j<nrows; j++ ) { 159 *bv++ = av[irow[j] - rstart]; 160 } 161 } 162 163 /* Assemble the matrices so that the correct flags are set */ 164 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166 167 /* Free work space */ 168 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 169 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 170 *B = newmat; 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNC__ 175 #define __FUNC__ "MatRestoreArray_MPIDense" 176 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 177 { 178 PetscFunctionBegin; 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNC__ 183 #define __FUNC__ "MatAssemblyBegin_MPIDense" 184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 187 MPI_Comm comm = mat->comm; 188 int ierr,nstash,reallocs; 189 InsertMode addv; 190 191 PetscFunctionBegin; 192 /* make sure all processors are either in INSERTMODE or ADDMODE */ 193 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 194 if (addv == (ADD_VALUES|INSERT_VALUES)) { 195 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 196 } 197 mat->insertmode = addv; /* in case this processor had no cache */ 198 199 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 200 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 201 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 202 nstash,reallocs); 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNC__ 207 #define __FUNC__ "MatAssemblyEnd_MPIDense" 208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 209 { 210 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 211 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 212 Scalar *val; 213 InsertMode addv=mat->insertmode; 214 215 PetscFunctionBegin; 216 /* wait on receives */ 217 while (1) { 218 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 219 if (!flg) break; 220 221 for ( i=0; i<n; ) { 222 /* Now identify the consecutive vals belonging to the same row */ 223 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 224 if (j < n) ncols = j-i; 225 else ncols = n-i; 226 /* Now assemble all these values with a single function call */ 227 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 228 i = j; 229 } 230 } 231 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 232 233 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 234 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 235 236 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 237 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 238 } 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNC__ 243 #define __FUNC__ "MatZeroEntries_MPIDense" 244 int MatZeroEntries_MPIDense(Mat A) 245 { 246 int ierr; 247 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 248 249 PetscFunctionBegin; 250 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNC__ 255 #define __FUNC__ "MatGetBlockSize_MPIDense" 256 int MatGetBlockSize_MPIDense(Mat A,int *bs) 257 { 258 PetscFunctionBegin; 259 *bs = 1; 260 PetscFunctionReturn(0); 261 } 262 263 /* the code does not do the diagonal entries correctly unless the 264 matrix is square and the column and row owerships are identical. 265 This is a BUG. The only way to fix it seems to be to access 266 mdn->A and mdn->B directly and not through the MatZeroRows() 267 routine. 268 */ 269 #undef __FUNC__ 270 #define __FUNC__ "MatZeroRows_MPIDense" 271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 272 { 273 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 274 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 275 int *procs,*nprocs,j,found,idx,nsends,*work; 276 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 277 int *rvalues,tag = A->tag,count,base,slen,n,*source; 278 int *lens,imdex,*lrows,*values; 279 MPI_Comm comm = A->comm; 280 MPI_Request *send_waits,*recv_waits; 281 MPI_Status recv_status,*send_status; 282 IS istmp; 283 284 PetscFunctionBegin; 285 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 286 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 287 288 /* first count number of contributors to each processor */ 289 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 290 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 291 procs = nprocs + size; 292 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 293 for ( i=0; i<N; i++ ) { 294 idx = rows[i]; 295 found = 0; 296 for ( j=0; j<size; j++ ) { 297 if (idx >= owners[j] && idx < owners[j+1]) { 298 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 299 } 300 } 301 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 302 } 303 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 304 305 /* inform other processors of number of messages and max length*/ 306 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 307 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 308 nrecvs = work[rank]; 309 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 310 nmax = work[rank]; 311 ierr = PetscFree(work);CHKERRQ(ierr); 312 313 /* post receives: */ 314 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 315 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 326 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 327 starts[0] = 0; 328 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 329 for ( i=0; i<N; i++ ) { 330 svalues[starts[owner[i]]++] = rows[i]; 331 } 332 ISRestoreIndices(is,&rows); 333 334 starts[0] = 0; 335 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 336 count = 0; 337 for ( i=0; i<size; i++ ) { 338 if (procs[i]) { 339 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 340 } 341 } 342 ierr = PetscFree(starts);CHKERRQ(ierr); 343 344 base = owners[rank]; 345 346 /* wait on receives */ 347 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 348 source = lens + nrecvs; 349 count = nrecvs; slen = 0; 350 while (count) { 351 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 352 /* unpack receives into our local space */ 353 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 354 source[imdex] = recv_status.MPI_SOURCE; 355 lens[imdex] = n; 356 slen += n; 357 count--; 358 } 359 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 360 361 /* move the data into the send scatter */ 362 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 363 count = 0; 364 for ( i=0; i<nrecvs; i++ ) { 365 values = rvalues + i*nmax; 366 for ( j=0; j<lens[i]; j++ ) { 367 lrows[count++] = values[j] - base; 368 } 369 } 370 ierr = PetscFree(rvalues);CHKERRQ(ierr); 371 ierr = PetscFree(lens);CHKERRQ(ierr); 372 ierr = PetscFree(owner);CHKERRQ(ierr); 373 ierr = PetscFree(nprocs);CHKERRQ(ierr); 374 375 /* actually zap the local rows */ 376 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 377 PLogObjectParent(A,istmp); 378 ierr = PetscFree(lrows);CHKERRQ(ierr); 379 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 380 ierr = ISDestroy(istmp);CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 385 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 386 ierr = PetscFree(send_status);CHKERRQ(ierr); 387 } 388 ierr = PetscFree(send_waits);CHKERRQ(ierr); 389 ierr = PetscFree(svalues);CHKERRQ(ierr); 390 391 PetscFunctionReturn(0); 392 } 393 394 #undef __FUNC__ 395 #define __FUNC__ "MatMult_MPIDense" 396 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 397 { 398 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 399 int ierr; 400 401 PetscFunctionBegin; 402 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 403 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 404 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatMultAdd_MPIDense" 410 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 411 { 412 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 413 int ierr; 414 415 PetscFunctionBegin; 416 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 417 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 418 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNC__ 423 #define __FUNC__ "MatMultTrans_MPIDense" 424 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 425 { 426 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 427 int ierr; 428 Scalar zero = 0.0; 429 430 PetscFunctionBegin; 431 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 432 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 433 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 434 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNC__ 439 #define __FUNC__ "MatMultTransAdd_MPIDense" 440 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 443 int ierr; 444 445 PetscFunctionBegin; 446 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 447 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 448 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 449 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNC__ 454 #define __FUNC__ "MatGetDiagonal_MPIDense" 455 int MatGetDiagonal_MPIDense(Mat A,Vec v) 456 { 457 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 458 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 459 int ierr, len, i, n, m = a->m, radd; 460 Scalar *x, zero = 0.0; 461 462 PetscFunctionBegin; 463 VecSet(&zero,v); 464 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 465 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 466 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 467 len = PetscMin(aloc->m,aloc->n); 468 radd = a->rstart*m; 469 for ( i=0; i<len; i++ ) { 470 x[i] = aloc->v[radd + i*m + i]; 471 } 472 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ "MatDestroy_MPIDense" 478 int MatDestroy_MPIDense(Mat mat) 479 { 480 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 481 int ierr; 482 483 PetscFunctionBegin; 484 485 if (mat->mapping) { 486 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 487 } 488 if (mat->bmapping) { 489 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 490 } 491 #if defined(PETSC_USE_LOG) 492 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 493 #endif 494 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 495 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 496 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 497 if (mdn->lvec) VecDestroy(mdn->lvec); 498 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 499 if (mdn->factor) { 500 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 501 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 502 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 503 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 504 } 505 ierr = PetscFree(mdn);CHKERRQ(ierr); 506 if (mat->rmap) { 507 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 508 } 509 if (mat->cmap) { 510 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 511 } 512 PLogObjectDestroy(mat); 513 PetscHeaderDestroy(mat); 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNC__ 518 #define __FUNC__ "MatView_MPIDense_Binary" 519 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 520 { 521 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 522 int ierr; 523 524 PetscFunctionBegin; 525 if (mdn->size == 1) { 526 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 527 } 528 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 529 PetscFunctionReturn(0); 530 } 531 532 #undef __FUNC__ 533 #define __FUNC__ "MatView_MPIDense_ASCII" 534 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 535 { 536 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 537 int ierr, format, size = mdn->size, rank = mdn->rank; 538 FILE *fd; 539 ViewerType vtype; 540 541 PetscFunctionBegin; 542 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 543 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 544 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 545 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 546 MatInfo info; 547 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 548 PetscSequentialPhaseBegin(mat->comm,1); 549 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 550 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 551 fflush(fd); 552 PetscSequentialPhaseEnd(mat->comm,1); 553 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 556 PetscFunctionReturn(0); 557 } 558 559 if (size == 1) { 560 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 561 } else { 562 /* assemble the entire matrix onto first processor. */ 563 Mat A; 564 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 565 Scalar *vals; 566 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 567 568 if (!rank) { 569 ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 570 } else { 571 ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 572 } 573 PLogObjectParent(mat,A); 574 575 /* Copy the matrix ... This isn't the most efficient means, 576 but it's quick for now */ 577 row = mdn->rstart; m = Amdn->m; 578 for ( i=0; i<m; i++ ) { 579 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 580 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 581 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 582 row++; 583 } 584 585 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 586 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 587 if (!rank) { 588 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 589 } 590 ierr = MatDestroy(A);CHKERRQ(ierr); 591 } 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNC__ 596 #define __FUNC__ "MatView_MPIDense" 597 int MatView_MPIDense(Mat mat,Viewer viewer) 598 { 599 int ierr; 600 601 PetscFunctionBegin; 602 if (PetscTypeCompare(viewer,ASCII_VIEWER)) { 603 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 604 } else if (PetscTypeCompare(viewer,BINARY_VIEWER)) { 605 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNC__ 613 #define __FUNC__ "MatGetInfo_MPIDense" 614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 615 { 616 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 617 Mat mdn = mat->A; 618 int ierr; 619 double isend[5], irecv[5]; 620 621 PetscFunctionBegin; 622 info->rows_global = (double)mat->M; 623 info->columns_global = (double)mat->N; 624 info->rows_local = (double)mat->m; 625 info->columns_local = (double)mat->N; 626 info->block_size = 1.0; 627 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 629 isend[3] = info->memory; isend[4] = info->mallocs; 630 if (flag == MAT_LOCAL) { 631 info->nz_used = isend[0]; 632 info->nz_allocated = isend[1]; 633 info->nz_unneeded = isend[2]; 634 info->memory = isend[3]; 635 info->mallocs = isend[4]; 636 } else if (flag == MAT_GLOBAL_MAX) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } else if (flag == MAT_GLOBAL_SUM) { 644 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 645 info->nz_used = irecv[0]; 646 info->nz_allocated = irecv[1]; 647 info->nz_unneeded = irecv[2]; 648 info->memory = irecv[3]; 649 info->mallocs = irecv[4]; 650 } 651 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 652 info->fill_ratio_needed = 0; 653 info->factor_mallocs = 0; 654 PetscFunctionReturn(0); 655 } 656 657 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 658 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 659 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 660 extern int MatSolve_MPIDense(Mat,Vec,Vec); 661 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 662 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 663 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatSetOption_MPIDense" 667 int MatSetOption_MPIDense(Mat A,MatOption op) 668 { 669 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 670 671 PetscFunctionBegin; 672 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 673 op == MAT_YES_NEW_NONZERO_LOCATIONS || 674 op == MAT_NEW_NONZERO_LOCATION_ERR || 675 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 676 op == MAT_COLUMNS_SORTED || 677 op == MAT_COLUMNS_UNSORTED) { 678 MatSetOption(a->A,op); 679 } else if (op == MAT_ROW_ORIENTED) { 680 a->roworiented = 1; 681 MatSetOption(a->A,op); 682 } else if (op == MAT_ROWS_SORTED || 683 op == MAT_ROWS_UNSORTED || 684 op == MAT_SYMMETRIC || 685 op == MAT_STRUCTURALLY_SYMMETRIC || 686 op == MAT_YES_NEW_DIAGONALS || 687 op == MAT_USE_HASH_TABLE) { 688 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 689 } else if (op == MAT_COLUMN_ORIENTED) { 690 a->roworiented = 0; MatSetOption(a->A,op); 691 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 692 a->donotstash = 1; 693 } else if (op == MAT_NO_NEW_DIAGONALS) { 694 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 695 } else { 696 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNC__ 702 #define __FUNC__ "MatGetSize_MPIDense" 703 int MatGetSize_MPIDense(Mat A,int *m,int *n) 704 { 705 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 706 707 PetscFunctionBegin; 708 *m = mat->M; *n = mat->N; 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatGetLocalSize_MPIDense" 714 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 715 { 716 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 717 718 PetscFunctionBegin; 719 *m = mat->m; *n = mat->N; 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNC__ 724 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 725 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 726 { 727 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 728 729 PetscFunctionBegin; 730 *m = mat->rstart; *n = mat->rend; 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNC__ 735 #define __FUNC__ "MatGetRow_MPIDense" 736 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 737 { 738 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 739 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 740 741 PetscFunctionBegin; 742 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 743 lrow = row - rstart; 744 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNC__ 749 #define __FUNC__ "MatRestoreRow_MPIDense" 750 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 751 { 752 int ierr; 753 754 PetscFunctionBegin; 755 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 756 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNC__ 761 #define __FUNC__ "MatDiagonalScale_MPIDense" 762 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 763 { 764 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 765 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 766 Scalar *l,*r,x,*v; 767 int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 768 769 PetscFunctionBegin; 770 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 771 if (ll) { 772 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 773 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 774 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 775 for ( i=0; i<m; i++ ) { 776 x = l[i]; 777 v = mat->v + i; 778 for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 779 } 780 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 781 PLogFlops(n*m); 782 } 783 if (rr) { 784 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 785 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 786 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 787 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 788 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 789 for ( i=0; i<n; i++ ) { 790 x = r[i]; 791 v = mat->v + i*m; 792 for ( j=0; j<m; j++ ) { (*v++) *= x;} 793 } 794 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 795 PLogFlops(n*m); 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ "MatNorm_MPIDense" 802 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 803 { 804 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 805 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 806 int ierr, i, j; 807 double sum = 0.0; 808 Scalar *v = mat->v; 809 810 PetscFunctionBegin; 811 if (mdn->size == 1) { 812 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 813 } else { 814 if (type == NORM_FROBENIUS) { 815 for (i=0; i<mat->n*mat->m; i++ ) { 816 #if defined(PETSC_USE_COMPLEX) 817 sum += PetscReal(PetscConj(*v)*(*v)); v++; 818 #else 819 sum += (*v)*(*v); v++; 820 #endif 821 } 822 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 823 *norm = sqrt(*norm); 824 PLogFlops(2*mat->n*mat->m); 825 } else if (type == NORM_1) { 826 double *tmp, *tmp2; 827 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 828 tmp2 = tmp + mdn->N; 829 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 830 *norm = 0.0; 831 v = mat->v; 832 for ( j=0; j<mat->n; j++ ) { 833 for ( i=0; i<mat->m; i++ ) { 834 tmp[j] += PetscAbsScalar(*v); v++; 835 } 836 } 837 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 838 for ( j=0; j<mdn->N; j++ ) { 839 if (tmp2[j] > *norm) *norm = tmp2[j]; 840 } 841 ierr = PetscFree(tmp);CHKERRQ(ierr); 842 PLogFlops(mat->n*mat->m); 843 } else if (type == NORM_INFINITY) { /* max row norm */ 844 double ntemp; 845 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 846 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 847 } else { 848 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 849 } 850 } 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNC__ 855 #define __FUNC__ "MatTranspose_MPIDense" 856 int MatTranspose_MPIDense(Mat A,Mat *matout) 857 { 858 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 859 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 860 Mat B; 861 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 862 int j, i, ierr; 863 Scalar *v; 864 865 PetscFunctionBegin; 866 if (matout == PETSC_NULL && M != N) { 867 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 868 } 869 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 870 871 m = Aloc->m; n = Aloc->n; v = Aloc->v; 872 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 873 for ( j=0; j<n; j++ ) { 874 for (i=0; i<m; i++) rwork[i] = rstart + i; 875 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 876 v += m; 877 } 878 ierr = PetscFree(rwork);CHKERRQ(ierr); 879 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 880 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 881 if (matout != PETSC_NULL) { 882 *matout = B; 883 } else { 884 PetscOps *Abops; 885 MatOps Aops; 886 887 /* This isn't really an in-place transpose, but free data struct from a */ 888 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 889 ierr = MatDestroy(a->A);CHKERRQ(ierr); 890 if (a->lvec) VecDestroy(a->lvec); 891 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 892 ierr = PetscFree(a);CHKERRQ(ierr); 893 894 /* 895 This is horrible, horrible code. We need to keep the 896 A pointers for the bops and ops but copy everything 897 else from C. 898 */ 899 Abops = A->bops; 900 Aops = A->ops; 901 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 902 A->bops = Abops; 903 A->ops = Aops; 904 905 PetscHeaderDestroy(B); 906 } 907 PetscFunctionReturn(0); 908 } 909 910 #include "pinclude/blaslapack.h" 911 #undef __FUNC__ 912 #define __FUNC__ "MatScale_MPIDense" 913 int MatScale_MPIDense(Scalar *alpha,Mat inA) 914 { 915 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 916 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 917 int one = 1, nz; 918 919 PetscFunctionBegin; 920 nz = a->m*a->n; 921 BLscal_( &nz, alpha, a->v, &one ); 922 PLogFlops(nz); 923 PetscFunctionReturn(0); 924 } 925 926 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 927 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 928 929 /* -------------------------------------------------------------------*/ 930 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 931 MatGetRow_MPIDense, 932 MatRestoreRow_MPIDense, 933 MatMult_MPIDense, 934 MatMultAdd_MPIDense, 935 MatMultTrans_MPIDense, 936 MatMultTransAdd_MPIDense, 937 0, 938 0, 939 0, 940 0, 941 0, 942 0, 943 0, 944 MatTranspose_MPIDense, 945 MatGetInfo_MPIDense,0, 946 MatGetDiagonal_MPIDense, 947 MatDiagonalScale_MPIDense, 948 MatNorm_MPIDense, 949 MatAssemblyBegin_MPIDense, 950 MatAssemblyEnd_MPIDense, 951 0, 952 MatSetOption_MPIDense, 953 MatZeroEntries_MPIDense, 954 MatZeroRows_MPIDense, 955 0, 956 0, 957 0, 958 0, 959 MatGetSize_MPIDense, 960 MatGetLocalSize_MPIDense, 961 MatGetOwnershipRange_MPIDense, 962 0, 963 0, 964 MatGetArray_MPIDense, 965 MatRestoreArray_MPIDense, 966 MatDuplicate_MPIDense, 967 0, 968 0, 969 0, 970 0, 971 0, 972 MatGetSubMatrices_MPIDense, 973 0, 974 MatGetValues_MPIDense, 975 0, 976 0, 977 MatScale_MPIDense, 978 0, 979 0, 980 0, 981 MatGetBlockSize_MPIDense, 982 0, 983 0, 984 0, 985 0, 986 0, 987 0, 988 0, 989 0, 990 0, 991 MatGetSubMatrix_MPIDense, 992 0, 993 0, 994 MatGetMaps_Petsc}; 995 996 #undef __FUNC__ 997 #define __FUNC__ "MatCreateMPIDense" 998 /*@C 999 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1000 1001 Collective on MPI_Comm 1002 1003 Input Parameters: 1004 + comm - MPI communicator 1005 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1006 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1007 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1008 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1009 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1010 to control all matrix memory allocation. 1011 1012 Output Parameter: 1013 . A - the matrix 1014 1015 Notes: 1016 The dense format is fully compatible with standard Fortran 77 1017 storage by columns. 1018 1019 The data input variable is intended primarily for Fortran programmers 1020 who wish to allocate their own matrix memory space. Most users should 1021 set data=PETSC_NULL. 1022 1023 The user MUST specify either the local or global matrix dimensions 1024 (possibly both). 1025 1026 Level: intermediate 1027 1028 .keywords: matrix, dense, parallel 1029 1030 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1031 @*/ 1032 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1033 { 1034 Mat mat; 1035 Mat_MPIDense *a; 1036 int ierr, i,flg; 1037 1038 PetscFunctionBegin; 1039 /* Note: For now, when data is specified above, this assumes the user correctly 1040 allocates the local dense storage space. We should add error checking. */ 1041 1042 *A = 0; 1043 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1044 PLogObjectCreate(mat); 1045 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1046 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1047 mat->ops->destroy = MatDestroy_MPIDense; 1048 mat->ops->view = MatView_MPIDense; 1049 mat->factor = 0; 1050 mat->mapping = 0; 1051 1052 a->factor = 0; 1053 mat->insertmode = NOT_SET_VALUES; 1054 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1055 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1056 1057 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1058 1059 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1060 a->nvec = n; 1061 1062 /* each row stores all columns */ 1063 a->N = mat->N = N; 1064 a->M = mat->M = M; 1065 a->m = mat->m = m; 1066 a->n = mat->n = N; /* NOTE: n == N */ 1067 1068 /* the information in the maps duplicates the information computed below, eventually 1069 we should remove the duplicate information that is not contained in the maps */ 1070 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1071 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1072 1073 /* build local table of row and column ownerships */ 1074 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1075 a->cowners = a->rowners + a->size + 1; 1076 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1077 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1078 a->rowners[0] = 0; 1079 for ( i=2; i<=a->size; i++ ) { 1080 a->rowners[i] += a->rowners[i-1]; 1081 } 1082 a->rstart = a->rowners[a->rank]; 1083 a->rend = a->rowners[a->rank+1]; 1084 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1085 a->cowners[0] = 0; 1086 for ( i=2; i<=a->size; i++ ) { 1087 a->cowners[i] += a->cowners[i-1]; 1088 } 1089 1090 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1091 PLogObjectParent(mat,a->A); 1092 1093 /* build cache for off array entries formed */ 1094 a->donotstash = 0; 1095 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1096 1097 /* stuff used for matrix vector multiply */ 1098 a->lvec = 0; 1099 a->Mvctx = 0; 1100 a->roworiented = 1; 1101 1102 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C", 1103 "MatGetDiagonalBlock_MPIDense", 1104 (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1105 1106 *A = mat; 1107 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1108 if (flg) { 1109 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatDuplicate_MPIDense" 1116 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1117 { 1118 Mat mat; 1119 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1120 int ierr; 1121 FactorCtx *factor; 1122 1123 PetscFunctionBegin; 1124 *newmat = 0; 1125 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1126 PLogObjectCreate(mat); 1127 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1128 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1129 mat->ops->destroy = MatDestroy_MPIDense; 1130 mat->ops->view = MatView_MPIDense; 1131 mat->factor = A->factor; 1132 mat->assembled = PETSC_TRUE; 1133 1134 a->m = mat->m = oldmat->m; 1135 a->n = mat->n = oldmat->n; 1136 a->M = mat->M = oldmat->M; 1137 a->N = mat->N = oldmat->N; 1138 if (oldmat->factor) { 1139 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1140 /* copy factor contents ... add this code! */ 1141 } else a->factor = 0; 1142 1143 a->rstart = oldmat->rstart; 1144 a->rend = oldmat->rend; 1145 a->size = oldmat->size; 1146 a->rank = oldmat->rank; 1147 mat->insertmode = NOT_SET_VALUES; 1148 a->donotstash = oldmat->donotstash; 1149 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1150 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1151 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1152 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1153 1154 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1155 PLogObjectParent(mat,a->lvec); 1156 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1157 PLogObjectParent(mat,a->Mvctx); 1158 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1159 PLogObjectParent(mat,a->A); 1160 *newmat = mat; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #include "sys.h" 1165 1166 #undef __FUNC__ 1167 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1168 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1169 { 1170 int *rowners, i,size,rank,m,ierr,nz,j; 1171 Scalar *array,*vals,*vals_ptr; 1172 MPI_Status status; 1173 1174 PetscFunctionBegin; 1175 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1176 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1177 1178 /* determine ownership of all rows */ 1179 m = M/size + ((M % size) > rank); 1180 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1181 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1182 rowners[0] = 0; 1183 for ( i=2; i<=size; i++ ) { 1184 rowners[i] += rowners[i-1]; 1185 } 1186 1187 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1188 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1189 1190 if (!rank) { 1191 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1192 1193 /* read in my part of the matrix numerical values */ 1194 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1195 1196 /* insert into matrix-by row (this is why cannot directly read into array */ 1197 vals_ptr = vals; 1198 for ( i=0; i<m; i++ ) { 1199 for ( j=0; j<N; j++ ) { 1200 array[i + j*m] = *vals_ptr++; 1201 } 1202 } 1203 1204 /* read in other processors and ship out */ 1205 for ( i=1; i<size; i++ ) { 1206 nz = (rowners[i+1] - rowners[i])*N; 1207 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1208 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1209 } 1210 } else { 1211 /* receive numeric values */ 1212 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1213 1214 /* receive message of values*/ 1215 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1216 1217 /* insert into matrix-by row (this is why cannot directly read into array */ 1218 vals_ptr = vals; 1219 for ( i=0; i<m; i++ ) { 1220 for ( j=0; j<N; j++ ) { 1221 array[i + j*m] = *vals_ptr++; 1222 } 1223 } 1224 } 1225 ierr = PetscFree(rowners);CHKERRQ(ierr); 1226 ierr = PetscFree(vals);CHKERRQ(ierr); 1227 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatLoad_MPIDense" 1235 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1236 { 1237 Mat A; 1238 Scalar *vals,*svals; 1239 MPI_Comm comm = ((PetscObject)viewer)->comm; 1240 MPI_Status status; 1241 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1242 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1243 int tag = ((PetscObject)viewer)->tag; 1244 int i, nz, ierr, j,rstart, rend, fd; 1245 1246 PetscFunctionBegin; 1247 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1249 if (!rank) { 1250 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1251 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1252 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1253 } 1254 1255 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1256 M = header[1]; N = header[2]; nz = header[3]; 1257 1258 /* 1259 Handle case where matrix is stored on disk as a dense matrix 1260 */ 1261 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1262 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 /* determine ownership of all rows */ 1267 m = M/size + ((M % size) > rank); 1268 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1269 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1270 rowners[0] = 0; 1271 for ( i=2; i<=size; i++ ) { 1272 rowners[i] += rowners[i-1]; 1273 } 1274 rstart = rowners[rank]; 1275 rend = rowners[rank+1]; 1276 1277 /* distribute row lengths to all processors */ 1278 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1279 offlens = ourlens + (rend-rstart); 1280 if (!rank) { 1281 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1282 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1283 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1284 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1285 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1286 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1287 } else { 1288 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1289 } 1290 1291 if (!rank) { 1292 /* calculate the number of nonzeros on each processor */ 1293 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1294 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1295 for ( i=0; i<size; i++ ) { 1296 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1297 procsnz[i] += rowlengths[j]; 1298 } 1299 } 1300 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1301 1302 /* determine max buffer needed and allocate it */ 1303 maxnz = 0; 1304 for ( i=0; i<size; i++ ) { 1305 maxnz = PetscMax(maxnz,procsnz[i]); 1306 } 1307 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1308 1309 /* read in my part of the matrix column indices */ 1310 nz = procsnz[0]; 1311 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1312 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1313 1314 /* read in every one elses and ship off */ 1315 for ( i=1; i<size; i++ ) { 1316 nz = procsnz[i]; 1317 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1318 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1319 } 1320 ierr = PetscFree(cols);CHKERRQ(ierr); 1321 } else { 1322 /* determine buffer space needed for message */ 1323 nz = 0; 1324 for ( i=0; i<m; i++ ) { 1325 nz += ourlens[i]; 1326 } 1327 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1328 1329 /* receive message of column indices*/ 1330 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1331 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1332 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1333 } 1334 1335 /* loop over local rows, determining number of off diagonal entries */ 1336 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1337 jj = 0; 1338 for ( i=0; i<m; i++ ) { 1339 for ( j=0; j<ourlens[i]; j++ ) { 1340 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1341 jj++; 1342 } 1343 } 1344 1345 /* create our matrix */ 1346 for ( i=0; i<m; i++ ) { 1347 ourlens[i] -= offlens[i]; 1348 } 1349 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1350 A = *newmat; 1351 for ( i=0; i<m; i++ ) { 1352 ourlens[i] += offlens[i]; 1353 } 1354 1355 if (!rank) { 1356 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1357 1358 /* read in my part of the matrix numerical values */ 1359 nz = procsnz[0]; 1360 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1361 1362 /* insert into matrix */ 1363 jj = rstart; 1364 smycols = mycols; 1365 svals = vals; 1366 for ( i=0; i<m; i++ ) { 1367 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1368 smycols += ourlens[i]; 1369 svals += ourlens[i]; 1370 jj++; 1371 } 1372 1373 /* read in other processors and ship out */ 1374 for ( i=1; i<size; i++ ) { 1375 nz = procsnz[i]; 1376 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1377 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1380 } else { 1381 /* receive numeric values */ 1382 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1383 1384 /* receive message of values*/ 1385 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1386 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1387 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1388 1389 /* insert into matrix */ 1390 jj = rstart; 1391 smycols = mycols; 1392 svals = vals; 1393 for ( i=0; i<m; i++ ) { 1394 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1395 smycols += ourlens[i]; 1396 svals += ourlens[i]; 1397 jj++; 1398 } 1399 } 1400 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1401 ierr = PetscFree(vals);CHKERRQ(ierr); 1402 ierr = PetscFree(mycols);CHKERRQ(ierr); 1403 ierr = PetscFree(rowners);CHKERRQ(ierr); 1404 1405 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 1411 1412 1413 1414