1 /*$Id: mpidense.c,v 1.130 1999/10/24 14:02:12 bsmith Exp bsmith $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNC__ 12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 16 int m = mdn->m,rstart = mdn->rstart,rank,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 27 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 28 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 29 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 30 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33 34 *iscopy = PETSC_TRUE; 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 extern int MatSetUpMultiply_MPIDense(Mat); 40 41 #undef __FUNC__ 42 #define __FUNC__ "MatSetValues_MPIDense" 43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 44 { 45 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 46 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 47 int roworiented = A->roworiented; 48 49 PetscFunctionBegin; 50 for ( i=0; i<m; i++ ) { 51 if (idxm[i] < 0) continue; 52 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 if (roworiented) { 56 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 57 } else { 58 for ( j=0; j<n; j++ ) { 59 if (idxn[j] < 0) continue; 60 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 61 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 62 } 63 } 64 } else { 65 if ( !A->donotstash) { 66 if (roworiented) { 67 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 68 } else { 69 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 70 } 71 } 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetValues_MPIDense" 79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 80 { 81 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 82 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 83 84 PetscFunctionBegin; 85 for ( i=0; i<m; i++ ) { 86 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 87 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 88 if (idxm[i] >= rstart && idxm[i] < rend) { 89 row = idxm[i] - rstart; 90 for ( j=0; j<n; j++ ) { 91 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 92 if (idxn[j] >= mdn->N) { 93 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 94 } 95 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 96 } 97 } else { 98 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 99 } 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNC__ 105 #define __FUNC__ "MatGetArray_MPIDense" 106 int MatGetArray_MPIDense(Mat A,Scalar **array) 107 { 108 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 109 int ierr; 110 111 PetscFunctionBegin; 112 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNC__ 117 #define __FUNC__ "MatGetSubMatrix_MPIDense" 118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 119 { 120 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 121 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 122 int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 123 Scalar *av, *bv, *v = lmat->v; 124 Mat newmat; 125 126 PetscFunctionBegin; 127 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 128 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 129 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 130 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 131 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 132 133 /* No parallel redistribution currently supported! Should really check each index set 134 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 135 original matrix! */ 136 137 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 138 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 139 140 /* Check submatrix call */ 141 if (scall == MAT_REUSE_MATRIX) { 142 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 143 /* Really need to test rows and column sizes! */ 144 newmat = *B; 145 } else { 146 /* Create and fill new matrix */ 147 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 148 } 149 150 /* Now extract the data pointers and do the copy, column at a time */ 151 newmatd = (Mat_MPIDense *) newmat->data; 152 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 153 154 for ( i=0; i<ncols; i++ ) { 155 av = v + nlrows*icol[i]; 156 for (j=0; j<nrows; j++ ) { 157 *bv++ = av[irow[j] - rstart]; 158 } 159 } 160 161 /* Assemble the matrices so that the correct flags are set */ 162 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 165 /* Free work space */ 166 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 167 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 168 *B = newmat; 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNC__ 173 #define __FUNC__ "MatRestoreArray_MPIDense" 174 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 175 { 176 PetscFunctionBegin; 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ "MatAssemblyBegin_MPIDense" 182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 183 { 184 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 185 MPI_Comm comm = mat->comm; 186 int ierr,nstash,reallocs; 187 InsertMode addv; 188 189 PetscFunctionBegin; 190 /* make sure all processors are either in INSERTMODE or ADDMODE */ 191 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 192 if (addv == (ADD_VALUES|INSERT_VALUES)) { 193 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 194 } 195 mat->insertmode = addv; /* in case this processor had no cache */ 196 197 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 198 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 199 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 200 nstash,reallocs); 201 PetscFunctionReturn(0); 202 } 203 204 #undef __FUNC__ 205 #define __FUNC__ "MatAssemblyEnd_MPIDense" 206 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 207 { 208 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 209 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 210 Scalar *val; 211 InsertMode addv=mat->insertmode; 212 213 PetscFunctionBegin; 214 /* wait on receives */ 215 while (1) { 216 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 217 if (!flg) break; 218 219 for ( i=0; i<n; ) { 220 /* Now identify the consecutive vals belonging to the same row */ 221 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 222 if (j < n) ncols = j-i; 223 else ncols = n-i; 224 /* Now assemble all these values with a single function call */ 225 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 226 i = j; 227 } 228 } 229 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 230 231 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 233 234 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 235 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 236 } 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNC__ 241 #define __FUNC__ "MatZeroEntries_MPIDense" 242 int MatZeroEntries_MPIDense(Mat A) 243 { 244 int ierr; 245 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 246 247 PetscFunctionBegin; 248 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 #undef __FUNC__ 253 #define __FUNC__ "MatGetBlockSize_MPIDense" 254 int MatGetBlockSize_MPIDense(Mat A,int *bs) 255 { 256 PetscFunctionBegin; 257 *bs = 1; 258 PetscFunctionReturn(0); 259 } 260 261 /* the code does not do the diagonal entries correctly unless the 262 matrix is square and the column and row owerships are identical. 263 This is a BUG. The only way to fix it seems to be to access 264 mdn->A and mdn->B directly and not through the MatZeroRows() 265 routine. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ "MatZeroRows_MPIDense" 269 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 270 { 271 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 272 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 273 int *procs,*nprocs,j,found,idx,nsends,*work; 274 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 275 int *rvalues,tag = A->tag,count,base,slen,n,*source; 276 int *lens,imdex,*lrows,*values; 277 MPI_Comm comm = A->comm; 278 MPI_Request *send_waits,*recv_waits; 279 MPI_Status recv_status,*send_status; 280 IS istmp; 281 282 PetscFunctionBegin; 283 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 284 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 285 286 /* first count number of contributors to each processor */ 287 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 288 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 289 procs = nprocs + size; 290 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 291 for ( i=0; i<N; i++ ) { 292 idx = rows[i]; 293 found = 0; 294 for ( j=0; j<size; j++ ) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 297 } 298 } 299 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 300 } 301 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 305 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 306 nmax = work[rank]; 307 nrecvs = work[size+rank]; 308 ierr = PetscFree(work);CHKERRQ(ierr); 309 310 /* post receives: */ 311 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 312 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 313 for ( i=0; i<nrecvs; i++ ) { 314 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 315 } 316 317 /* do sends: 318 1) starts[i] gives the starting index in svalues for stuff going to 319 the ith processor 320 */ 321 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 322 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 323 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 324 starts[0] = 0; 325 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 326 for ( i=0; i<N; i++ ) { 327 svalues[starts[owner[i]]++] = rows[i]; 328 } 329 ISRestoreIndices(is,&rows); 330 331 starts[0] = 0; 332 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 333 count = 0; 334 for ( i=0; i<size; i++ ) { 335 if (procs[i]) { 336 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 337 } 338 } 339 ierr = PetscFree(starts);CHKERRQ(ierr); 340 341 base = owners[rank]; 342 343 /* wait on receives */ 344 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 345 source = lens + nrecvs; 346 count = nrecvs; slen = 0; 347 while (count) { 348 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 349 /* unpack receives into our local space */ 350 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 351 source[imdex] = recv_status.MPI_SOURCE; 352 lens[imdex] = n; 353 slen += n; 354 count--; 355 } 356 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 357 358 /* move the data into the send scatter */ 359 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 360 count = 0; 361 for ( i=0; i<nrecvs; i++ ) { 362 values = rvalues + i*nmax; 363 for ( j=0; j<lens[i]; j++ ) { 364 lrows[count++] = values[j] - base; 365 } 366 } 367 ierr = PetscFree(rvalues);CHKERRQ(ierr); 368 ierr = PetscFree(lens);CHKERRQ(ierr); 369 ierr = PetscFree(owner);CHKERRQ(ierr); 370 ierr = PetscFree(nprocs);CHKERRQ(ierr); 371 372 /* actually zap the local rows */ 373 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 374 PLogObjectParent(A,istmp); 375 ierr = PetscFree(lrows);CHKERRQ(ierr); 376 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 377 ierr = ISDestroy(istmp);CHKERRQ(ierr); 378 379 /* wait on sends */ 380 if (nsends) { 381 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 382 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 383 ierr = PetscFree(send_status);CHKERRQ(ierr); 384 } 385 ierr = PetscFree(send_waits);CHKERRQ(ierr); 386 ierr = PetscFree(svalues);CHKERRQ(ierr); 387 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNC__ 392 #define __FUNC__ "MatMult_MPIDense" 393 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 396 int ierr; 397 398 PetscFunctionBegin; 399 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNC__ 406 #define __FUNC__ "MatMultAdd_MPIDense" 407 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 410 int ierr; 411 412 PetscFunctionBegin; 413 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNC__ 420 #define __FUNC__ "MatMultTrans_MPIDense" 421 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 422 { 423 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 424 int ierr; 425 Scalar zero = 0.0; 426 427 PetscFunctionBegin; 428 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 429 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 430 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 431 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNC__ 436 #define __FUNC__ "MatMultTransAdd_MPIDense" 437 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 438 { 439 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 440 int ierr; 441 442 PetscFunctionBegin; 443 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 444 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 445 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 446 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNC__ 451 #define __FUNC__ "MatGetDiagonal_MPIDense" 452 int MatGetDiagonal_MPIDense(Mat A,Vec v) 453 { 454 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 455 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 456 int ierr, len, i, n, m = a->m, radd; 457 Scalar *x, zero = 0.0; 458 459 PetscFunctionBegin; 460 VecSet(&zero,v); 461 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 462 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 463 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 464 len = PetscMin(aloc->m,aloc->n); 465 radd = a->rstart*m; 466 for ( i=0; i<len; i++ ) { 467 x[i] = aloc->v[radd + i*m + i]; 468 } 469 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatDestroy_MPIDense" 475 int MatDestroy_MPIDense(Mat mat) 476 { 477 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 478 int ierr; 479 480 PetscFunctionBegin; 481 482 if (mat->mapping) { 483 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 484 } 485 if (mat->bmapping) { 486 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_LOG) 489 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 490 #endif 491 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 492 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 493 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 494 if (mdn->lvec) VecDestroy(mdn->lvec); 495 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 496 if (mdn->factor) { 497 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 498 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 499 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 500 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 501 } 502 ierr = PetscFree(mdn);CHKERRQ(ierr); 503 if (mat->rmap) { 504 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 505 } 506 if (mat->cmap) { 507 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 508 } 509 PLogObjectDestroy(mat); 510 PetscHeaderDestroy(mat); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNC__ 515 #define __FUNC__ "MatView_MPIDense_Binary" 516 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 517 { 518 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 519 int ierr; 520 521 PetscFunctionBegin; 522 if (mdn->size == 1) { 523 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 524 } 525 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNC__ 530 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket" 531 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer) 532 { 533 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 534 int ierr, format, size = mdn->size, rank = mdn->rank; 535 ViewerType vtype; 536 PetscTruth isascii,isdraw; 537 538 PetscFunctionBegin; 539 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 540 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 541 if (isascii) { 542 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 543 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 544 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 545 MatInfo info; 546 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 547 ierr = ViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 548 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 549 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 550 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 551 PetscFunctionReturn(0); 552 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 553 PetscFunctionReturn(0); 554 } 555 } else if (isdraw) { 556 Draw draw; 557 PetscTruth isnull; 558 559 ierr = ViewerDrawGetDraw(viewer, 0, &draw);CHKERRQ(ierr); 560 ierr = DrawIsNull(draw, &isnull);CHKERRQ(ierr); 561 if (isnull) PetscFunctionReturn(0); 562 } 563 564 if (size == 1) { 565 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 566 } else { 567 /* assemble the entire matrix onto first processor. */ 568 Mat A; 569 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 570 Scalar *vals; 571 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 572 573 if (!rank) { 574 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 575 } else { 576 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 577 } 578 PLogObjectParent(mat,A); 579 580 /* Copy the matrix ... This isn't the most efficient means, 581 but it's quick for now */ 582 row = mdn->rstart; m = Amdn->m; 583 for ( i=0; i<m; i++ ) { 584 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 585 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 586 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 587 row++; 588 } 589 590 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 591 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 592 if (!rank) { 593 Viewer sviewer; 594 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 595 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 596 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 597 } 598 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 599 ierr = MatDestroy(A);CHKERRQ(ierr); 600 } 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNC__ 605 #define __FUNC__ "MatView_MPIDense" 606 int MatView_MPIDense(Mat mat,Viewer viewer) 607 { 608 int ierr; 609 PetscTruth isascii,isbinary,isdraw,issocket; 610 611 PetscFunctionBegin; 612 613 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 614 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 615 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 616 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 617 618 if (isascii || issocket || isdraw) { 619 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 620 } else if (isbinary) { 621 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 622 } else { 623 SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 624 } 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatGetInfo_MPIDense" 630 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 631 { 632 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 633 Mat mdn = mat->A; 634 int ierr; 635 double isend[5], irecv[5]; 636 637 PetscFunctionBegin; 638 info->rows_global = (double)mat->M; 639 info->columns_global = (double)mat->N; 640 info->rows_local = (double)mat->m; 641 info->columns_local = (double)mat->N; 642 info->block_size = 1.0; 643 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 644 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 645 isend[3] = info->memory; isend[4] = info->mallocs; 646 if (flag == MAT_LOCAL) { 647 info->nz_used = isend[0]; 648 info->nz_allocated = isend[1]; 649 info->nz_unneeded = isend[2]; 650 info->memory = isend[3]; 651 info->mallocs = isend[4]; 652 } else if (flag == MAT_GLOBAL_MAX) { 653 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 654 info->nz_used = irecv[0]; 655 info->nz_allocated = irecv[1]; 656 info->nz_unneeded = irecv[2]; 657 info->memory = irecv[3]; 658 info->mallocs = irecv[4]; 659 } else if (flag == MAT_GLOBAL_SUM) { 660 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 661 info->nz_used = irecv[0]; 662 info->nz_allocated = irecv[1]; 663 info->nz_unneeded = irecv[2]; 664 info->memory = irecv[3]; 665 info->mallocs = irecv[4]; 666 } 667 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 668 info->fill_ratio_needed = 0; 669 info->factor_mallocs = 0; 670 PetscFunctionReturn(0); 671 } 672 673 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 674 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 675 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 676 extern int MatSolve_MPIDense(Mat,Vec,Vec); 677 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 678 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 679 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 680 681 #undef __FUNC__ 682 #define __FUNC__ "MatSetOption_MPIDense" 683 int MatSetOption_MPIDense(Mat A,MatOption op) 684 { 685 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 686 687 PetscFunctionBegin; 688 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 689 op == MAT_YES_NEW_NONZERO_LOCATIONS || 690 op == MAT_NEW_NONZERO_LOCATION_ERR || 691 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 692 op == MAT_COLUMNS_SORTED || 693 op == MAT_COLUMNS_UNSORTED) { 694 MatSetOption(a->A,op); 695 } else if (op == MAT_ROW_ORIENTED) { 696 a->roworiented = 1; 697 MatSetOption(a->A,op); 698 } else if (op == MAT_ROWS_SORTED || 699 op == MAT_ROWS_UNSORTED || 700 op == MAT_SYMMETRIC || 701 op == MAT_STRUCTURALLY_SYMMETRIC || 702 op == MAT_YES_NEW_DIAGONALS || 703 op == MAT_USE_HASH_TABLE) { 704 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 705 } else if (op == MAT_COLUMN_ORIENTED) { 706 a->roworiented = 0; MatSetOption(a->A,op); 707 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 708 a->donotstash = 1; 709 } else if (op == MAT_NO_NEW_DIAGONALS) { 710 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 711 } else { 712 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 713 } 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNC__ 718 #define __FUNC__ "MatGetSize_MPIDense" 719 int MatGetSize_MPIDense(Mat A,int *m,int *n) 720 { 721 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 722 723 PetscFunctionBegin; 724 if (m) *m = mat->M; 725 if (n) *n = mat->N; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNC__ 730 #define __FUNC__ "MatGetLocalSize_MPIDense" 731 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 732 { 733 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 734 735 PetscFunctionBegin; 736 *m = mat->m; *n = mat->N; 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNC__ 741 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 742 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 743 { 744 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 745 746 PetscFunctionBegin; 747 *m = mat->rstart; *n = mat->rend; 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNC__ 752 #define __FUNC__ "MatGetRow_MPIDense" 753 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 754 { 755 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 756 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 757 758 PetscFunctionBegin; 759 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 760 lrow = row - rstart; 761 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNC__ 766 #define __FUNC__ "MatRestoreRow_MPIDense" 767 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 768 { 769 int ierr; 770 771 PetscFunctionBegin; 772 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 773 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 774 PetscFunctionReturn(0); 775 } 776 777 #undef __FUNC__ 778 #define __FUNC__ "MatDiagonalScale_MPIDense" 779 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 780 { 781 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 782 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 783 Scalar *l,*r,x,*v; 784 int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 785 786 PetscFunctionBegin; 787 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 788 if (ll) { 789 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 790 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 791 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 792 for ( i=0; i<m; i++ ) { 793 x = l[i]; 794 v = mat->v + i; 795 for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 796 } 797 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 798 PLogFlops(n*m); 799 } 800 if (rr) { 801 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 802 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 803 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 804 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 805 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 806 for ( i=0; i<n; i++ ) { 807 x = r[i]; 808 v = mat->v + i*m; 809 for ( j=0; j<m; j++ ) { (*v++) *= x;} 810 } 811 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 812 PLogFlops(n*m); 813 } 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNC__ 818 #define __FUNC__ "MatNorm_MPIDense" 819 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 820 { 821 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 822 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 823 int ierr, i, j; 824 double sum = 0.0; 825 Scalar *v = mat->v; 826 827 PetscFunctionBegin; 828 if (mdn->size == 1) { 829 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 830 } else { 831 if (type == NORM_FROBENIUS) { 832 for (i=0; i<mat->n*mat->m; i++ ) { 833 #if defined(PETSC_USE_COMPLEX) 834 sum += PetscReal(PetscConj(*v)*(*v)); v++; 835 #else 836 sum += (*v)*(*v); v++; 837 #endif 838 } 839 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 840 *norm = sqrt(*norm); 841 PLogFlops(2*mat->n*mat->m); 842 } else if (type == NORM_1) { 843 double *tmp, *tmp2; 844 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 845 tmp2 = tmp + mdn->N; 846 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 847 *norm = 0.0; 848 v = mat->v; 849 for ( j=0; j<mat->n; j++ ) { 850 for ( i=0; i<mat->m; i++ ) { 851 tmp[j] += PetscAbsScalar(*v); v++; 852 } 853 } 854 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 855 for ( j=0; j<mdn->N; j++ ) { 856 if (tmp2[j] > *norm) *norm = tmp2[j]; 857 } 858 ierr = PetscFree(tmp);CHKERRQ(ierr); 859 PLogFlops(mat->n*mat->m); 860 } else if (type == NORM_INFINITY) { /* max row norm */ 861 double ntemp; 862 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 863 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 864 } else { 865 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 866 } 867 } 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNC__ 872 #define __FUNC__ "MatTranspose_MPIDense" 873 int MatTranspose_MPIDense(Mat A,Mat *matout) 874 { 875 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 876 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 877 Mat B; 878 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 879 int j, i, ierr; 880 Scalar *v; 881 882 PetscFunctionBegin; 883 if (matout == PETSC_NULL && M != N) { 884 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 885 } 886 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 887 888 m = Aloc->m; n = Aloc->n; v = Aloc->v; 889 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 890 for ( j=0; j<n; j++ ) { 891 for (i=0; i<m; i++) rwork[i] = rstart + i; 892 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 893 v += m; 894 } 895 ierr = PetscFree(rwork);CHKERRQ(ierr); 896 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 if (matout != PETSC_NULL) { 899 *matout = B; 900 } else { 901 PetscOps *Abops; 902 MatOps Aops; 903 904 /* This isn't really an in-place transpose, but free data struct from a */ 905 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 906 ierr = MatDestroy(a->A);CHKERRQ(ierr); 907 if (a->lvec) VecDestroy(a->lvec); 908 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 909 ierr = PetscFree(a);CHKERRQ(ierr); 910 911 /* 912 This is horrible, horrible code. We need to keep the 913 A pointers for the bops and ops but copy everything 914 else from C. 915 */ 916 Abops = A->bops; 917 Aops = A->ops; 918 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 919 A->bops = Abops; 920 A->ops = Aops; 921 922 PetscHeaderDestroy(B); 923 } 924 PetscFunctionReturn(0); 925 } 926 927 #include "pinclude/blaslapack.h" 928 #undef __FUNC__ 929 #define __FUNC__ "MatScale_MPIDense" 930 int MatScale_MPIDense(Scalar *alpha,Mat inA) 931 { 932 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 933 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 934 int one = 1, nz; 935 936 PetscFunctionBegin; 937 nz = a->m*a->n; 938 BLscal_( &nz, alpha, a->v, &one ); 939 PLogFlops(nz); 940 PetscFunctionReturn(0); 941 } 942 943 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 944 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 945 946 /* -------------------------------------------------------------------*/ 947 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 948 MatGetRow_MPIDense, 949 MatRestoreRow_MPIDense, 950 MatMult_MPIDense, 951 MatMultAdd_MPIDense, 952 MatMultTrans_MPIDense, 953 MatMultTransAdd_MPIDense, 954 0, 955 0, 956 0, 957 0, 958 0, 959 0, 960 0, 961 MatTranspose_MPIDense, 962 MatGetInfo_MPIDense,0, 963 MatGetDiagonal_MPIDense, 964 MatDiagonalScale_MPIDense, 965 MatNorm_MPIDense, 966 MatAssemblyBegin_MPIDense, 967 MatAssemblyEnd_MPIDense, 968 0, 969 MatSetOption_MPIDense, 970 MatZeroEntries_MPIDense, 971 MatZeroRows_MPIDense, 972 0, 973 0, 974 0, 975 0, 976 MatGetSize_MPIDense, 977 MatGetLocalSize_MPIDense, 978 MatGetOwnershipRange_MPIDense, 979 0, 980 0, 981 MatGetArray_MPIDense, 982 MatRestoreArray_MPIDense, 983 MatDuplicate_MPIDense, 984 0, 985 0, 986 0, 987 0, 988 0, 989 MatGetSubMatrices_MPIDense, 990 0, 991 MatGetValues_MPIDense, 992 0, 993 0, 994 MatScale_MPIDense, 995 0, 996 0, 997 0, 998 MatGetBlockSize_MPIDense, 999 0, 1000 0, 1001 0, 1002 0, 1003 0, 1004 0, 1005 0, 1006 0, 1007 0, 1008 MatGetSubMatrix_MPIDense, 1009 0, 1010 0, 1011 MatGetMaps_Petsc}; 1012 1013 #undef __FUNC__ 1014 #define __FUNC__ "MatCreateMPIDense" 1015 /*@C 1016 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1017 1018 Collective on MPI_Comm 1019 1020 Input Parameters: 1021 + comm - MPI communicator 1022 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1023 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1024 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1025 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1026 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1027 to control all matrix memory allocation. 1028 1029 Output Parameter: 1030 . A - the matrix 1031 1032 Notes: 1033 The dense format is fully compatible with standard Fortran 77 1034 storage by columns. 1035 1036 The data input variable is intended primarily for Fortran programmers 1037 who wish to allocate their own matrix memory space. Most users should 1038 set data=PETSC_NULL. 1039 1040 The user MUST specify either the local or global matrix dimensions 1041 (possibly both). 1042 1043 Level: intermediate 1044 1045 .keywords: matrix, dense, parallel 1046 1047 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1048 @*/ 1049 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1050 { 1051 Mat mat; 1052 Mat_MPIDense *a; 1053 int ierr, i; 1054 PetscTruth flg; 1055 1056 PetscFunctionBegin; 1057 /* Note: For now, when data is specified above, this assumes the user correctly 1058 allocates the local dense storage space. We should add error checking. */ 1059 1060 *A = 0; 1061 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1062 PLogObjectCreate(mat); 1063 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1064 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1065 mat->ops->destroy = MatDestroy_MPIDense; 1066 mat->ops->view = MatView_MPIDense; 1067 mat->factor = 0; 1068 mat->mapping = 0; 1069 1070 a->factor = 0; 1071 mat->insertmode = NOT_SET_VALUES; 1072 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1073 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1074 1075 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1076 1077 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1078 a->nvec = n; 1079 1080 /* each row stores all columns */ 1081 a->N = mat->N = N; 1082 a->M = mat->M = M; 1083 a->m = mat->m = m; 1084 a->n = mat->n = N; /* NOTE: n == N */ 1085 1086 /* the information in the maps duplicates the information computed below, eventually 1087 we should remove the duplicate information that is not contained in the maps */ 1088 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1089 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1090 1091 /* build local table of row and column ownerships */ 1092 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1093 a->cowners = a->rowners + a->size + 1; 1094 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1095 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1096 a->rowners[0] = 0; 1097 for ( i=2; i<=a->size; i++ ) { 1098 a->rowners[i] += a->rowners[i-1]; 1099 } 1100 a->rstart = a->rowners[a->rank]; 1101 a->rend = a->rowners[a->rank+1]; 1102 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1103 a->cowners[0] = 0; 1104 for ( i=2; i<=a->size; i++ ) { 1105 a->cowners[i] += a->cowners[i-1]; 1106 } 1107 1108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1109 PLogObjectParent(mat,a->A); 1110 1111 /* build cache for off array entries formed */ 1112 a->donotstash = 0; 1113 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1114 1115 /* stuff used for matrix vector multiply */ 1116 a->lvec = 0; 1117 a->Mvctx = 0; 1118 a->roworiented = 1; 1119 1120 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1121 "MatGetDiagonalBlock_MPIDense", 1122 (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1123 1124 *A = mat; 1125 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1126 if (flg) { 1127 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1128 } 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNC__ 1133 #define __FUNC__ "MatDuplicate_MPIDense" 1134 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1135 { 1136 Mat mat; 1137 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1138 int ierr; 1139 FactorCtx *factor; 1140 1141 PetscFunctionBegin; 1142 *newmat = 0; 1143 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1144 PLogObjectCreate(mat); 1145 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1146 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1147 mat->ops->destroy = MatDestroy_MPIDense; 1148 mat->ops->view = MatView_MPIDense; 1149 mat->factor = A->factor; 1150 mat->assembled = PETSC_TRUE; 1151 1152 a->m = mat->m = oldmat->m; 1153 a->n = mat->n = oldmat->n; 1154 a->M = mat->M = oldmat->M; 1155 a->N = mat->N = oldmat->N; 1156 if (oldmat->factor) { 1157 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1158 /* copy factor contents ... add this code! */ 1159 } else a->factor = 0; 1160 1161 a->rstart = oldmat->rstart; 1162 a->rend = oldmat->rend; 1163 a->size = oldmat->size; 1164 a->rank = oldmat->rank; 1165 mat->insertmode = NOT_SET_VALUES; 1166 a->donotstash = oldmat->donotstash; 1167 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1168 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1169 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1170 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1171 1172 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1173 PLogObjectParent(mat,a->lvec); 1174 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1175 PLogObjectParent(mat,a->Mvctx); 1176 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1177 PLogObjectParent(mat,a->A); 1178 *newmat = mat; 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #include "sys.h" 1183 1184 #undef __FUNC__ 1185 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1186 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1187 { 1188 int *rowners, i,size,rank,m,ierr,nz,j; 1189 Scalar *array,*vals,*vals_ptr; 1190 MPI_Status status; 1191 1192 PetscFunctionBegin; 1193 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 1196 /* determine ownership of all rows */ 1197 m = M/size + ((M % size) > rank); 1198 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1199 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1200 rowners[0] = 0; 1201 for ( i=2; i<=size; i++ ) { 1202 rowners[i] += rowners[i-1]; 1203 } 1204 1205 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1206 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1207 1208 if (!rank) { 1209 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1210 1211 /* read in my part of the matrix numerical values */ 1212 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1213 1214 /* insert into matrix-by row (this is why cannot directly read into array */ 1215 vals_ptr = vals; 1216 for ( i=0; i<m; i++ ) { 1217 for ( j=0; j<N; j++ ) { 1218 array[i + j*m] = *vals_ptr++; 1219 } 1220 } 1221 1222 /* read in other processors and ship out */ 1223 for ( i=1; i<size; i++ ) { 1224 nz = (rowners[i+1] - rowners[i])*N; 1225 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1226 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1227 } 1228 } else { 1229 /* receive numeric values */ 1230 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1231 1232 /* receive message of values*/ 1233 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1234 1235 /* insert into matrix-by row (this is why cannot directly read into array */ 1236 vals_ptr = vals; 1237 for ( i=0; i<m; i++ ) { 1238 for ( j=0; j<N; j++ ) { 1239 array[i + j*m] = *vals_ptr++; 1240 } 1241 } 1242 } 1243 ierr = PetscFree(rowners);CHKERRQ(ierr); 1244 ierr = PetscFree(vals);CHKERRQ(ierr); 1245 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1246 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 1251 #undef __FUNC__ 1252 #define __FUNC__ "MatLoad_MPIDense" 1253 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1254 { 1255 Mat A; 1256 Scalar *vals,*svals; 1257 MPI_Comm comm = ((PetscObject)viewer)->comm; 1258 MPI_Status status; 1259 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1260 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1261 int tag = ((PetscObject)viewer)->tag; 1262 int i, nz, ierr, j,rstart, rend, fd; 1263 1264 PetscFunctionBegin; 1265 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1266 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1267 if (!rank) { 1268 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1269 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1270 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1271 } 1272 1273 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1274 M = header[1]; N = header[2]; nz = header[3]; 1275 1276 /* 1277 Handle case where matrix is stored on disk as a dense matrix 1278 */ 1279 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1280 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 /* determine ownership of all rows */ 1285 m = M/size + ((M % size) > rank); 1286 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1287 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1288 rowners[0] = 0; 1289 for ( i=2; i<=size; i++ ) { 1290 rowners[i] += rowners[i-1]; 1291 } 1292 rstart = rowners[rank]; 1293 rend = rowners[rank+1]; 1294 1295 /* distribute row lengths to all processors */ 1296 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1297 offlens = ourlens + (rend-rstart); 1298 if (!rank) { 1299 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1300 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1301 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1302 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1303 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1304 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1305 } else { 1306 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1307 } 1308 1309 if (!rank) { 1310 /* calculate the number of nonzeros on each processor */ 1311 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1312 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1313 for ( i=0; i<size; i++ ) { 1314 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1315 procsnz[i] += rowlengths[j]; 1316 } 1317 } 1318 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1319 1320 /* determine max buffer needed and allocate it */ 1321 maxnz = 0; 1322 for ( i=0; i<size; i++ ) { 1323 maxnz = PetscMax(maxnz,procsnz[i]); 1324 } 1325 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1326 1327 /* read in my part of the matrix column indices */ 1328 nz = procsnz[0]; 1329 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1330 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1331 1332 /* read in every one elses and ship off */ 1333 for ( i=1; i<size; i++ ) { 1334 nz = procsnz[i]; 1335 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1336 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1337 } 1338 ierr = PetscFree(cols);CHKERRQ(ierr); 1339 } else { 1340 /* determine buffer space needed for message */ 1341 nz = 0; 1342 for ( i=0; i<m; i++ ) { 1343 nz += ourlens[i]; 1344 } 1345 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1346 1347 /* receive message of column indices*/ 1348 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1349 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1350 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1351 } 1352 1353 /* loop over local rows, determining number of off diagonal entries */ 1354 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1355 jj = 0; 1356 for ( i=0; i<m; i++ ) { 1357 for ( j=0; j<ourlens[i]; j++ ) { 1358 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1359 jj++; 1360 } 1361 } 1362 1363 /* create our matrix */ 1364 for ( i=0; i<m; i++ ) { 1365 ourlens[i] -= offlens[i]; 1366 } 1367 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1368 A = *newmat; 1369 for ( i=0; i<m; i++ ) { 1370 ourlens[i] += offlens[i]; 1371 } 1372 1373 if (!rank) { 1374 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1375 1376 /* read in my part of the matrix numerical values */ 1377 nz = procsnz[0]; 1378 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1379 1380 /* insert into matrix */ 1381 jj = rstart; 1382 smycols = mycols; 1383 svals = vals; 1384 for ( i=0; i<m; i++ ) { 1385 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1386 smycols += ourlens[i]; 1387 svals += ourlens[i]; 1388 jj++; 1389 } 1390 1391 /* read in other processors and ship out */ 1392 for ( i=1; i<size; i++ ) { 1393 nz = procsnz[i]; 1394 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1395 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1396 } 1397 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1398 } else { 1399 /* receive numeric values */ 1400 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1401 1402 /* receive message of values*/ 1403 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1404 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1405 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1406 1407 /* insert into matrix */ 1408 jj = rstart; 1409 smycols = mycols; 1410 svals = vals; 1411 for ( i=0; i<m; i++ ) { 1412 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1413 smycols += ourlens[i]; 1414 svals += ourlens[i]; 1415 jj++; 1416 } 1417 } 1418 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1419 ierr = PetscFree(vals);CHKERRQ(ierr); 1420 ierr = PetscFree(mycols);CHKERRQ(ierr); 1421 ierr = PetscFree(rowners);CHKERRQ(ierr); 1422 1423 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 1429 1430 1431 1432