1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.128 1999/10/13 20:37:17 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 EXTERN_C_BEGIN 13 #undef __FUNC__ 14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 16 { 17 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 18 int m = mdn->m,rstart = mdn->rstart,rank,ierr; 19 Scalar *array; 20 MPI_Comm comm; 21 22 PetscFunctionBegin; 23 if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 24 25 /* The reuse aspect is not implemented efficiently */ 26 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 27 28 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 29 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 30 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 32 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 33 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 34 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 35 36 *iscopy = PETSC_TRUE; 37 PetscFunctionReturn(0); 38 } 39 EXTERN_C_END 40 41 extern int MatSetUpMultiply_MPIDense(Mat); 42 43 #undef __FUNC__ 44 #define __FUNC__ "MatSetValues_MPIDense" 45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 46 { 47 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 48 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 49 int roworiented = A->roworiented; 50 51 PetscFunctionBegin; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) continue; 54 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 if (roworiented) { 58 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 59 } else { 60 for ( j=0; j<n; j++ ) { 61 if (idxn[j] < 0) continue; 62 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 63 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 64 } 65 } 66 } else { 67 if ( !A->donotstash) { 68 if (roworiented) { 69 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 70 } else { 71 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 72 } 73 } 74 } 75 } 76 PetscFunctionReturn(0); 77 } 78 79 #undef __FUNC__ 80 #define __FUNC__ "MatGetValues_MPIDense" 81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 82 { 83 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 84 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 85 86 PetscFunctionBegin; 87 for ( i=0; i<m; i++ ) { 88 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 89 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 90 if (idxm[i] >= rstart && idxm[i] < rend) { 91 row = idxm[i] - rstart; 92 for ( j=0; j<n; j++ ) { 93 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 94 if (idxn[j] >= mdn->N) { 95 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 96 } 97 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 98 } 99 } else { 100 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 101 } 102 } 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNC__ 107 #define __FUNC__ "MatGetArray_MPIDense" 108 int MatGetArray_MPIDense(Mat A,Scalar **array) 109 { 110 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 111 int ierr; 112 113 PetscFunctionBegin; 114 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNC__ 119 #define __FUNC__ "MatGetSubMatrix_MPIDense" 120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 121 { 122 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 123 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 124 int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 125 Scalar *av, *bv, *v = lmat->v; 126 Mat newmat; 127 128 PetscFunctionBegin; 129 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 130 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 131 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 132 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 133 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 134 135 /* No parallel redistribution currently supported! Should really check each index set 136 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 137 original matrix! */ 138 139 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 140 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 141 142 /* Check submatrix call */ 143 if (scall == MAT_REUSE_MATRIX) { 144 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 145 /* Really need to test rows and column sizes! */ 146 newmat = *B; 147 } else { 148 /* Create and fill new matrix */ 149 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 150 } 151 152 /* Now extract the data pointers and do the copy, column at a time */ 153 newmatd = (Mat_MPIDense *) newmat->data; 154 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 155 156 for ( i=0; i<ncols; i++ ) { 157 av = v + nlrows*icol[i]; 158 for (j=0; j<nrows; j++ ) { 159 *bv++ = av[irow[j] - rstart]; 160 } 161 } 162 163 /* Assemble the matrices so that the correct flags are set */ 164 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166 167 /* Free work space */ 168 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 169 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 170 *B = newmat; 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNC__ 175 #define __FUNC__ "MatRestoreArray_MPIDense" 176 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 177 { 178 PetscFunctionBegin; 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNC__ 183 #define __FUNC__ "MatAssemblyBegin_MPIDense" 184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 187 MPI_Comm comm = mat->comm; 188 int ierr,nstash,reallocs; 189 InsertMode addv; 190 191 PetscFunctionBegin; 192 /* make sure all processors are either in INSERTMODE or ADDMODE */ 193 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 194 if (addv == (ADD_VALUES|INSERT_VALUES)) { 195 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 196 } 197 mat->insertmode = addv; /* in case this processor had no cache */ 198 199 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 200 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 201 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 202 nstash,reallocs); 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNC__ 207 #define __FUNC__ "MatAssemblyEnd_MPIDense" 208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 209 { 210 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 211 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 212 Scalar *val; 213 InsertMode addv=mat->insertmode; 214 215 PetscFunctionBegin; 216 /* wait on receives */ 217 while (1) { 218 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 219 if (!flg) break; 220 221 for ( i=0; i<n; ) { 222 /* Now identify the consecutive vals belonging to the same row */ 223 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 224 if (j < n) ncols = j-i; 225 else ncols = n-i; 226 /* Now assemble all these values with a single function call */ 227 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 228 i = j; 229 } 230 } 231 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 232 233 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 234 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 235 236 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 237 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 238 } 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNC__ 243 #define __FUNC__ "MatZeroEntries_MPIDense" 244 int MatZeroEntries_MPIDense(Mat A) 245 { 246 int ierr; 247 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 248 249 PetscFunctionBegin; 250 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNC__ 255 #define __FUNC__ "MatGetBlockSize_MPIDense" 256 int MatGetBlockSize_MPIDense(Mat A,int *bs) 257 { 258 PetscFunctionBegin; 259 *bs = 1; 260 PetscFunctionReturn(0); 261 } 262 263 /* the code does not do the diagonal entries correctly unless the 264 matrix is square and the column and row owerships are identical. 265 This is a BUG. The only way to fix it seems to be to access 266 mdn->A and mdn->B directly and not through the MatZeroRows() 267 routine. 268 */ 269 #undef __FUNC__ 270 #define __FUNC__ "MatZeroRows_MPIDense" 271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 272 { 273 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 274 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 275 int *procs,*nprocs,j,found,idx,nsends,*work; 276 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 277 int *rvalues,tag = A->tag,count,base,slen,n,*source; 278 int *lens,imdex,*lrows,*values; 279 MPI_Comm comm = A->comm; 280 MPI_Request *send_waits,*recv_waits; 281 MPI_Status recv_status,*send_status; 282 IS istmp; 283 284 PetscFunctionBegin; 285 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 286 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 287 288 /* first count number of contributors to each processor */ 289 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 290 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 291 procs = nprocs + size; 292 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 293 for ( i=0; i<N; i++ ) { 294 idx = rows[i]; 295 found = 0; 296 for ( j=0; j<size; j++ ) { 297 if (idx >= owners[j] && idx < owners[j+1]) { 298 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 299 } 300 } 301 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 302 } 303 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 304 305 /* inform other processors of number of messages and max length*/ 306 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 307 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 308 nmax = work[rank]; 309 nrecvs = work[size+rank]; 310 ierr = PetscFree(work);CHKERRQ(ierr); 311 312 /* post receives: */ 313 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 315 for ( i=0; i<nrecvs; i++ ) { 316 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 317 } 318 319 /* do sends: 320 1) starts[i] gives the starting index in svalues for stuff going to 321 the ith processor 322 */ 323 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 324 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 325 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 326 starts[0] = 0; 327 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 328 for ( i=0; i<N; i++ ) { 329 svalues[starts[owner[i]]++] = rows[i]; 330 } 331 ISRestoreIndices(is,&rows); 332 333 starts[0] = 0; 334 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 335 count = 0; 336 for ( i=0; i<size; i++ ) { 337 if (procs[i]) { 338 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 339 } 340 } 341 ierr = PetscFree(starts);CHKERRQ(ierr); 342 343 base = owners[rank]; 344 345 /* wait on receives */ 346 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 347 source = lens + nrecvs; 348 count = nrecvs; slen = 0; 349 while (count) { 350 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 351 /* unpack receives into our local space */ 352 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 353 source[imdex] = recv_status.MPI_SOURCE; 354 lens[imdex] = n; 355 slen += n; 356 count--; 357 } 358 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 359 360 /* move the data into the send scatter */ 361 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 362 count = 0; 363 for ( i=0; i<nrecvs; i++ ) { 364 values = rvalues + i*nmax; 365 for ( j=0; j<lens[i]; j++ ) { 366 lrows[count++] = values[j] - base; 367 } 368 } 369 ierr = PetscFree(rvalues);CHKERRQ(ierr); 370 ierr = PetscFree(lens);CHKERRQ(ierr); 371 ierr = PetscFree(owner);CHKERRQ(ierr); 372 ierr = PetscFree(nprocs);CHKERRQ(ierr); 373 374 /* actually zap the local rows */ 375 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 376 PLogObjectParent(A,istmp); 377 ierr = PetscFree(lrows);CHKERRQ(ierr); 378 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 379 ierr = ISDestroy(istmp);CHKERRQ(ierr); 380 381 /* wait on sends */ 382 if (nsends) { 383 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 384 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 385 ierr = PetscFree(send_status);CHKERRQ(ierr); 386 } 387 ierr = PetscFree(send_waits);CHKERRQ(ierr); 388 ierr = PetscFree(svalues);CHKERRQ(ierr); 389 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNC__ 394 #define __FUNC__ "MatMult_MPIDense" 395 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 396 { 397 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 398 int ierr; 399 400 PetscFunctionBegin; 401 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 402 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 403 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNC__ 408 #define __FUNC__ "MatMultAdd_MPIDense" 409 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 410 { 411 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 412 int ierr; 413 414 PetscFunctionBegin; 415 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 416 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 417 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNC__ 422 #define __FUNC__ "MatMultTrans_MPIDense" 423 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 424 { 425 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 426 int ierr; 427 Scalar zero = 0.0; 428 429 PetscFunctionBegin; 430 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 431 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 432 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 433 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNC__ 438 #define __FUNC__ "MatMultTransAdd_MPIDense" 439 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 440 { 441 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 442 int ierr; 443 444 PetscFunctionBegin; 445 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 446 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 447 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 448 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNC__ 453 #define __FUNC__ "MatGetDiagonal_MPIDense" 454 int MatGetDiagonal_MPIDense(Mat A,Vec v) 455 { 456 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 457 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 458 int ierr, len, i, n, m = a->m, radd; 459 Scalar *x, zero = 0.0; 460 461 PetscFunctionBegin; 462 VecSet(&zero,v); 463 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 464 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 465 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 466 len = PetscMin(aloc->m,aloc->n); 467 radd = a->rstart*m; 468 for ( i=0; i<len; i++ ) { 469 x[i] = aloc->v[radd + i*m + i]; 470 } 471 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNC__ 476 #define __FUNC__ "MatDestroy_MPIDense" 477 int MatDestroy_MPIDense(Mat mat) 478 { 479 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 480 int ierr; 481 482 PetscFunctionBegin; 483 484 if (mat->mapping) { 485 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 486 } 487 if (mat->bmapping) { 488 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 489 } 490 #if defined(PETSC_USE_LOG) 491 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 492 #endif 493 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 494 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 495 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 496 if (mdn->lvec) VecDestroy(mdn->lvec); 497 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 498 if (mdn->factor) { 499 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 500 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 501 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 502 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 503 } 504 ierr = PetscFree(mdn);CHKERRQ(ierr); 505 if (mat->rmap) { 506 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 507 } 508 if (mat->cmap) { 509 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 510 } 511 PLogObjectDestroy(mat); 512 PetscHeaderDestroy(mat); 513 PetscFunctionReturn(0); 514 } 515 516 #undef __FUNC__ 517 #define __FUNC__ "MatView_MPIDense_Binary" 518 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 519 { 520 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 521 int ierr; 522 523 PetscFunctionBegin; 524 if (mdn->size == 1) { 525 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 526 } 527 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNC__ 532 #define __FUNC__ "MatView_MPIDense_ASCII" 533 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 534 { 535 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 536 int ierr, format, size = mdn->size, rank = mdn->rank; 537 FILE *fd; 538 ViewerType vtype; 539 540 PetscFunctionBegin; 541 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 542 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 543 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 544 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 545 MatInfo info; 546 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 547 ierr = ViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 548 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 549 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 550 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 551 PetscFunctionReturn(0); 552 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 553 PetscFunctionReturn(0); 554 } 555 556 if (size == 1) { 557 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 558 } else { 559 /* assemble the entire matrix onto first processor. */ 560 Mat A; 561 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 562 Scalar *vals; 563 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 564 565 if (!rank) { 566 ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 567 } else { 568 ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 569 } 570 PLogObjectParent(mat,A); 571 572 /* Copy the matrix ... This isn't the most efficient means, 573 but it's quick for now */ 574 row = mdn->rstart; m = Amdn->m; 575 for ( i=0; i<m; i++ ) { 576 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 577 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 578 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 579 row++; 580 } 581 582 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 583 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 584 if (!rank) { 585 Viewer sviewer; 586 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 587 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 588 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 589 } 590 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 591 ierr = MatDestroy(A);CHKERRQ(ierr); 592 } 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNC__ 597 #define __FUNC__ "MatView_MPIDense" 598 int MatView_MPIDense(Mat mat,Viewer viewer) 599 { 600 int ierr; 601 PetscTruth isascii,isbinary; 602 603 PetscFunctionBegin; 604 605 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 606 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 607 608 if (isascii) { 609 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 610 } else if (isbinary) { 611 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 612 } else { 613 SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 614 } 615 PetscFunctionReturn(0); 616 } 617 618 #undef __FUNC__ 619 #define __FUNC__ "MatGetInfo_MPIDense" 620 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 621 { 622 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 623 Mat mdn = mat->A; 624 int ierr; 625 double isend[5], irecv[5]; 626 627 PetscFunctionBegin; 628 info->rows_global = (double)mat->M; 629 info->columns_global = (double)mat->N; 630 info->rows_local = (double)mat->m; 631 info->columns_local = (double)mat->N; 632 info->block_size = 1.0; 633 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 634 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 635 isend[3] = info->memory; isend[4] = info->mallocs; 636 if (flag == MAT_LOCAL) { 637 info->nz_used = isend[0]; 638 info->nz_allocated = isend[1]; 639 info->nz_unneeded = isend[2]; 640 info->memory = isend[3]; 641 info->mallocs = isend[4]; 642 } else if (flag == MAT_GLOBAL_MAX) { 643 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 644 info->nz_used = irecv[0]; 645 info->nz_allocated = irecv[1]; 646 info->nz_unneeded = irecv[2]; 647 info->memory = irecv[3]; 648 info->mallocs = irecv[4]; 649 } else if (flag == MAT_GLOBAL_SUM) { 650 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 651 info->nz_used = irecv[0]; 652 info->nz_allocated = irecv[1]; 653 info->nz_unneeded = irecv[2]; 654 info->memory = irecv[3]; 655 info->mallocs = irecv[4]; 656 } 657 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 658 info->fill_ratio_needed = 0; 659 info->factor_mallocs = 0; 660 PetscFunctionReturn(0); 661 } 662 663 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 664 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 665 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 666 extern int MatSolve_MPIDense(Mat,Vec,Vec); 667 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 668 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 669 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatSetOption_MPIDense" 673 int MatSetOption_MPIDense(Mat A,MatOption op) 674 { 675 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 676 677 PetscFunctionBegin; 678 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 679 op == MAT_YES_NEW_NONZERO_LOCATIONS || 680 op == MAT_NEW_NONZERO_LOCATION_ERR || 681 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 682 op == MAT_COLUMNS_SORTED || 683 op == MAT_COLUMNS_UNSORTED) { 684 MatSetOption(a->A,op); 685 } else if (op == MAT_ROW_ORIENTED) { 686 a->roworiented = 1; 687 MatSetOption(a->A,op); 688 } else if (op == MAT_ROWS_SORTED || 689 op == MAT_ROWS_UNSORTED || 690 op == MAT_SYMMETRIC || 691 op == MAT_STRUCTURALLY_SYMMETRIC || 692 op == MAT_YES_NEW_DIAGONALS || 693 op == MAT_USE_HASH_TABLE) { 694 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 695 } else if (op == MAT_COLUMN_ORIENTED) { 696 a->roworiented = 0; MatSetOption(a->A,op); 697 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 698 a->donotstash = 1; 699 } else if (op == MAT_NO_NEW_DIAGONALS) { 700 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 701 } else { 702 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ "MatGetSize_MPIDense" 709 int MatGetSize_MPIDense(Mat A,int *m,int *n) 710 { 711 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 712 713 PetscFunctionBegin; 714 *m = mat->M; *n = mat->N; 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNC__ 719 #define __FUNC__ "MatGetLocalSize_MPIDense" 720 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 721 { 722 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 723 724 PetscFunctionBegin; 725 *m = mat->m; *n = mat->N; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNC__ 730 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 731 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 732 { 733 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 734 735 PetscFunctionBegin; 736 *m = mat->rstart; *n = mat->rend; 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNC__ 741 #define __FUNC__ "MatGetRow_MPIDense" 742 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 743 { 744 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 745 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 746 747 PetscFunctionBegin; 748 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 749 lrow = row - rstart; 750 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNC__ 755 #define __FUNC__ "MatRestoreRow_MPIDense" 756 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 757 { 758 int ierr; 759 760 PetscFunctionBegin; 761 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 762 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNC__ 767 #define __FUNC__ "MatDiagonalScale_MPIDense" 768 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 769 { 770 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 771 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 772 Scalar *l,*r,x,*v; 773 int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 774 775 PetscFunctionBegin; 776 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 777 if (ll) { 778 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 779 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 780 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 781 for ( i=0; i<m; i++ ) { 782 x = l[i]; 783 v = mat->v + i; 784 for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 785 } 786 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 787 PLogFlops(n*m); 788 } 789 if (rr) { 790 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 791 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 792 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 793 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 794 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 795 for ( i=0; i<n; i++ ) { 796 x = r[i]; 797 v = mat->v + i*m; 798 for ( j=0; j<m; j++ ) { (*v++) *= x;} 799 } 800 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 801 PLogFlops(n*m); 802 } 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNC__ 807 #define __FUNC__ "MatNorm_MPIDense" 808 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 809 { 810 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 811 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 812 int ierr, i, j; 813 double sum = 0.0; 814 Scalar *v = mat->v; 815 816 PetscFunctionBegin; 817 if (mdn->size == 1) { 818 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 819 } else { 820 if (type == NORM_FROBENIUS) { 821 for (i=0; i<mat->n*mat->m; i++ ) { 822 #if defined(PETSC_USE_COMPLEX) 823 sum += PetscReal(PetscConj(*v)*(*v)); v++; 824 #else 825 sum += (*v)*(*v); v++; 826 #endif 827 } 828 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 829 *norm = sqrt(*norm); 830 PLogFlops(2*mat->n*mat->m); 831 } else if (type == NORM_1) { 832 double *tmp, *tmp2; 833 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 834 tmp2 = tmp + mdn->N; 835 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 836 *norm = 0.0; 837 v = mat->v; 838 for ( j=0; j<mat->n; j++ ) { 839 for ( i=0; i<mat->m; i++ ) { 840 tmp[j] += PetscAbsScalar(*v); v++; 841 } 842 } 843 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 844 for ( j=0; j<mdn->N; j++ ) { 845 if (tmp2[j] > *norm) *norm = tmp2[j]; 846 } 847 ierr = PetscFree(tmp);CHKERRQ(ierr); 848 PLogFlops(mat->n*mat->m); 849 } else if (type == NORM_INFINITY) { /* max row norm */ 850 double ntemp; 851 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 852 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 853 } else { 854 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 855 } 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNC__ 861 #define __FUNC__ "MatTranspose_MPIDense" 862 int MatTranspose_MPIDense(Mat A,Mat *matout) 863 { 864 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 865 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 866 Mat B; 867 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 868 int j, i, ierr; 869 Scalar *v; 870 871 PetscFunctionBegin; 872 if (matout == PETSC_NULL && M != N) { 873 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 874 } 875 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 876 877 m = Aloc->m; n = Aloc->n; v = Aloc->v; 878 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 879 for ( j=0; j<n; j++ ) { 880 for (i=0; i<m; i++) rwork[i] = rstart + i; 881 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 882 v += m; 883 } 884 ierr = PetscFree(rwork);CHKERRQ(ierr); 885 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 if (matout != PETSC_NULL) { 888 *matout = B; 889 } else { 890 PetscOps *Abops; 891 MatOps Aops; 892 893 /* This isn't really an in-place transpose, but free data struct from a */ 894 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 895 ierr = MatDestroy(a->A);CHKERRQ(ierr); 896 if (a->lvec) VecDestroy(a->lvec); 897 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 898 ierr = PetscFree(a);CHKERRQ(ierr); 899 900 /* 901 This is horrible, horrible code. We need to keep the 902 A pointers for the bops and ops but copy everything 903 else from C. 904 */ 905 Abops = A->bops; 906 Aops = A->ops; 907 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 908 A->bops = Abops; 909 A->ops = Aops; 910 911 PetscHeaderDestroy(B); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 #include "pinclude/blaslapack.h" 917 #undef __FUNC__ 918 #define __FUNC__ "MatScale_MPIDense" 919 int MatScale_MPIDense(Scalar *alpha,Mat inA) 920 { 921 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 922 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 923 int one = 1, nz; 924 925 PetscFunctionBegin; 926 nz = a->m*a->n; 927 BLscal_( &nz, alpha, a->v, &one ); 928 PLogFlops(nz); 929 PetscFunctionReturn(0); 930 } 931 932 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 933 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 934 935 /* -------------------------------------------------------------------*/ 936 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 937 MatGetRow_MPIDense, 938 MatRestoreRow_MPIDense, 939 MatMult_MPIDense, 940 MatMultAdd_MPIDense, 941 MatMultTrans_MPIDense, 942 MatMultTransAdd_MPIDense, 943 0, 944 0, 945 0, 946 0, 947 0, 948 0, 949 0, 950 MatTranspose_MPIDense, 951 MatGetInfo_MPIDense,0, 952 MatGetDiagonal_MPIDense, 953 MatDiagonalScale_MPIDense, 954 MatNorm_MPIDense, 955 MatAssemblyBegin_MPIDense, 956 MatAssemblyEnd_MPIDense, 957 0, 958 MatSetOption_MPIDense, 959 MatZeroEntries_MPIDense, 960 MatZeroRows_MPIDense, 961 0, 962 0, 963 0, 964 0, 965 MatGetSize_MPIDense, 966 MatGetLocalSize_MPIDense, 967 MatGetOwnershipRange_MPIDense, 968 0, 969 0, 970 MatGetArray_MPIDense, 971 MatRestoreArray_MPIDense, 972 MatDuplicate_MPIDense, 973 0, 974 0, 975 0, 976 0, 977 0, 978 MatGetSubMatrices_MPIDense, 979 0, 980 MatGetValues_MPIDense, 981 0, 982 0, 983 MatScale_MPIDense, 984 0, 985 0, 986 0, 987 MatGetBlockSize_MPIDense, 988 0, 989 0, 990 0, 991 0, 992 0, 993 0, 994 0, 995 0, 996 0, 997 MatGetSubMatrix_MPIDense, 998 0, 999 0, 1000 MatGetMaps_Petsc}; 1001 1002 #undef __FUNC__ 1003 #define __FUNC__ "MatCreateMPIDense" 1004 /*@C 1005 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1006 1007 Collective on MPI_Comm 1008 1009 Input Parameters: 1010 + comm - MPI communicator 1011 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1012 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1013 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1014 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1015 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1016 to control all matrix memory allocation. 1017 1018 Output Parameter: 1019 . A - the matrix 1020 1021 Notes: 1022 The dense format is fully compatible with standard Fortran 77 1023 storage by columns. 1024 1025 The data input variable is intended primarily for Fortran programmers 1026 who wish to allocate their own matrix memory space. Most users should 1027 set data=PETSC_NULL. 1028 1029 The user MUST specify either the local or global matrix dimensions 1030 (possibly both). 1031 1032 Level: intermediate 1033 1034 .keywords: matrix, dense, parallel 1035 1036 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1037 @*/ 1038 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1039 { 1040 Mat mat; 1041 Mat_MPIDense *a; 1042 int ierr, i,flg; 1043 1044 PetscFunctionBegin; 1045 /* Note: For now, when data is specified above, this assumes the user correctly 1046 allocates the local dense storage space. We should add error checking. */ 1047 1048 *A = 0; 1049 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1050 PLogObjectCreate(mat); 1051 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1052 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1053 mat->ops->destroy = MatDestroy_MPIDense; 1054 mat->ops->view = MatView_MPIDense; 1055 mat->factor = 0; 1056 mat->mapping = 0; 1057 1058 a->factor = 0; 1059 mat->insertmode = NOT_SET_VALUES; 1060 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1061 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1062 1063 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1064 1065 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1066 a->nvec = n; 1067 1068 /* each row stores all columns */ 1069 a->N = mat->N = N; 1070 a->M = mat->M = M; 1071 a->m = mat->m = m; 1072 a->n = mat->n = N; /* NOTE: n == N */ 1073 1074 /* the information in the maps duplicates the information computed below, eventually 1075 we should remove the duplicate information that is not contained in the maps */ 1076 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1077 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1078 1079 /* build local table of row and column ownerships */ 1080 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1081 a->cowners = a->rowners + a->size + 1; 1082 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1083 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1084 a->rowners[0] = 0; 1085 for ( i=2; i<=a->size; i++ ) { 1086 a->rowners[i] += a->rowners[i-1]; 1087 } 1088 a->rstart = a->rowners[a->rank]; 1089 a->rend = a->rowners[a->rank+1]; 1090 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1091 a->cowners[0] = 0; 1092 for ( i=2; i<=a->size; i++ ) { 1093 a->cowners[i] += a->cowners[i-1]; 1094 } 1095 1096 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1097 PLogObjectParent(mat,a->A); 1098 1099 /* build cache for off array entries formed */ 1100 a->donotstash = 0; 1101 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1102 1103 /* stuff used for matrix vector multiply */ 1104 a->lvec = 0; 1105 a->Mvctx = 0; 1106 a->roworiented = 1; 1107 1108 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C", 1109 "MatGetDiagonalBlock_MPIDense", 1110 (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1111 1112 *A = mat; 1113 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1114 if (flg) { 1115 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNC__ 1121 #define __FUNC__ "MatDuplicate_MPIDense" 1122 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1123 { 1124 Mat mat; 1125 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1126 int ierr; 1127 FactorCtx *factor; 1128 1129 PetscFunctionBegin; 1130 *newmat = 0; 1131 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1132 PLogObjectCreate(mat); 1133 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1134 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1135 mat->ops->destroy = MatDestroy_MPIDense; 1136 mat->ops->view = MatView_MPIDense; 1137 mat->factor = A->factor; 1138 mat->assembled = PETSC_TRUE; 1139 1140 a->m = mat->m = oldmat->m; 1141 a->n = mat->n = oldmat->n; 1142 a->M = mat->M = oldmat->M; 1143 a->N = mat->N = oldmat->N; 1144 if (oldmat->factor) { 1145 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1146 /* copy factor contents ... add this code! */ 1147 } else a->factor = 0; 1148 1149 a->rstart = oldmat->rstart; 1150 a->rend = oldmat->rend; 1151 a->size = oldmat->size; 1152 a->rank = oldmat->rank; 1153 mat->insertmode = NOT_SET_VALUES; 1154 a->donotstash = oldmat->donotstash; 1155 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1156 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1157 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1158 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1159 1160 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1161 PLogObjectParent(mat,a->lvec); 1162 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1163 PLogObjectParent(mat,a->Mvctx); 1164 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1165 PLogObjectParent(mat,a->A); 1166 *newmat = mat; 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #include "sys.h" 1171 1172 #undef __FUNC__ 1173 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1174 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1175 { 1176 int *rowners, i,size,rank,m,ierr,nz,j; 1177 Scalar *array,*vals,*vals_ptr; 1178 MPI_Status status; 1179 1180 PetscFunctionBegin; 1181 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1182 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1183 1184 /* determine ownership of all rows */ 1185 m = M/size + ((M % size) > rank); 1186 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1187 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1188 rowners[0] = 0; 1189 for ( i=2; i<=size; i++ ) { 1190 rowners[i] += rowners[i-1]; 1191 } 1192 1193 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1194 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1195 1196 if (!rank) { 1197 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1198 1199 /* read in my part of the matrix numerical values */ 1200 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1201 1202 /* insert into matrix-by row (this is why cannot directly read into array */ 1203 vals_ptr = vals; 1204 for ( i=0; i<m; i++ ) { 1205 for ( j=0; j<N; j++ ) { 1206 array[i + j*m] = *vals_ptr++; 1207 } 1208 } 1209 1210 /* read in other processors and ship out */ 1211 for ( i=1; i<size; i++ ) { 1212 nz = (rowners[i+1] - rowners[i])*N; 1213 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1214 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1215 } 1216 } else { 1217 /* receive numeric values */ 1218 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1219 1220 /* receive message of values*/ 1221 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1222 1223 /* insert into matrix-by row (this is why cannot directly read into array */ 1224 vals_ptr = vals; 1225 for ( i=0; i<m; i++ ) { 1226 for ( j=0; j<N; j++ ) { 1227 array[i + j*m] = *vals_ptr++; 1228 } 1229 } 1230 } 1231 ierr = PetscFree(rowners);CHKERRQ(ierr); 1232 ierr = PetscFree(vals);CHKERRQ(ierr); 1233 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1234 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 1239 #undef __FUNC__ 1240 #define __FUNC__ "MatLoad_MPIDense" 1241 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1242 { 1243 Mat A; 1244 Scalar *vals,*svals; 1245 MPI_Comm comm = ((PetscObject)viewer)->comm; 1246 MPI_Status status; 1247 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1248 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1249 int tag = ((PetscObject)viewer)->tag; 1250 int i, nz, ierr, j,rstart, rend, fd; 1251 1252 PetscFunctionBegin; 1253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1254 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1255 if (!rank) { 1256 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1257 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1258 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1259 } 1260 1261 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1262 M = header[1]; N = header[2]; nz = header[3]; 1263 1264 /* 1265 Handle case where matrix is stored on disk as a dense matrix 1266 */ 1267 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1268 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 /* determine ownership of all rows */ 1273 m = M/size + ((M % size) > rank); 1274 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1275 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1276 rowners[0] = 0; 1277 for ( i=2; i<=size; i++ ) { 1278 rowners[i] += rowners[i-1]; 1279 } 1280 rstart = rowners[rank]; 1281 rend = rowners[rank+1]; 1282 1283 /* distribute row lengths to all processors */ 1284 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1285 offlens = ourlens + (rend-rstart); 1286 if (!rank) { 1287 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1288 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1289 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1290 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1291 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1292 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1293 } else { 1294 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1295 } 1296 1297 if (!rank) { 1298 /* calculate the number of nonzeros on each processor */ 1299 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1300 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1301 for ( i=0; i<size; i++ ) { 1302 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1303 procsnz[i] += rowlengths[j]; 1304 } 1305 } 1306 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1307 1308 /* determine max buffer needed and allocate it */ 1309 maxnz = 0; 1310 for ( i=0; i<size; i++ ) { 1311 maxnz = PetscMax(maxnz,procsnz[i]); 1312 } 1313 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1314 1315 /* read in my part of the matrix column indices */ 1316 nz = procsnz[0]; 1317 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1318 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1319 1320 /* read in every one elses and ship off */ 1321 for ( i=1; i<size; i++ ) { 1322 nz = procsnz[i]; 1323 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1324 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1325 } 1326 ierr = PetscFree(cols);CHKERRQ(ierr); 1327 } else { 1328 /* determine buffer space needed for message */ 1329 nz = 0; 1330 for ( i=0; i<m; i++ ) { 1331 nz += ourlens[i]; 1332 } 1333 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1334 1335 /* receive message of column indices*/ 1336 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1337 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1338 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1339 } 1340 1341 /* loop over local rows, determining number of off diagonal entries */ 1342 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1343 jj = 0; 1344 for ( i=0; i<m; i++ ) { 1345 for ( j=0; j<ourlens[i]; j++ ) { 1346 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1347 jj++; 1348 } 1349 } 1350 1351 /* create our matrix */ 1352 for ( i=0; i<m; i++ ) { 1353 ourlens[i] -= offlens[i]; 1354 } 1355 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1356 A = *newmat; 1357 for ( i=0; i<m; i++ ) { 1358 ourlens[i] += offlens[i]; 1359 } 1360 1361 if (!rank) { 1362 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1363 1364 /* read in my part of the matrix numerical values */ 1365 nz = procsnz[0]; 1366 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1367 1368 /* insert into matrix */ 1369 jj = rstart; 1370 smycols = mycols; 1371 svals = vals; 1372 for ( i=0; i<m; i++ ) { 1373 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1374 smycols += ourlens[i]; 1375 svals += ourlens[i]; 1376 jj++; 1377 } 1378 1379 /* read in other processors and ship out */ 1380 for ( i=1; i<size; i++ ) { 1381 nz = procsnz[i]; 1382 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1383 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1384 } 1385 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1386 } else { 1387 /* receive numeric values */ 1388 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1389 1390 /* receive message of values*/ 1391 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1392 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1393 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1394 1395 /* insert into matrix */ 1396 jj = rstart; 1397 smycols = mycols; 1398 svals = vals; 1399 for ( i=0; i<m; i++ ) { 1400 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1401 smycols += ourlens[i]; 1402 svals += ourlens[i]; 1403 jj++; 1404 } 1405 } 1406 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1407 ierr = PetscFree(vals);CHKERRQ(ierr); 1408 ierr = PetscFree(mycols);CHKERRQ(ierr); 1409 ierr = PetscFree(rowners);CHKERRQ(ierr); 1410 1411 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 1417 1418 1419 1420