1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.124 1999/09/02 14:53:19 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 EXTERN_C_BEGIN 13 #undef __FUNC__ 14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 16 { 17 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 18 int m = mdn->m,rstart = mdn->rstart,rank,ierr; 19 Scalar *array; 20 MPI_Comm comm; 21 22 PetscFunctionBegin; 23 if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 24 25 /* The reuse aspect is not implemented efficiently */ 26 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 27 28 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 29 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 30 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 32 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 33 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 34 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 35 36 *iscopy = PETSC_TRUE; 37 PetscFunctionReturn(0); 38 } 39 EXTERN_C_END 40 41 extern int MatSetUpMultiply_MPIDense(Mat); 42 43 #undef __FUNC__ 44 #define __FUNC__ "MatSetValues_MPIDense" 45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 46 { 47 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 48 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 49 int roworiented = A->roworiented; 50 51 PetscFunctionBegin; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) continue; 54 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 if (roworiented) { 58 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 59 } else { 60 for ( j=0; j<n; j++ ) { 61 if (idxn[j] < 0) continue; 62 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 63 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 64 } 65 } 66 } else { 67 if ( !A->donotstash) { 68 if (roworiented) { 69 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 70 } else { 71 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 72 } 73 } 74 } 75 } 76 PetscFunctionReturn(0); 77 } 78 79 #undef __FUNC__ 80 #define __FUNC__ "MatGetValues_MPIDense" 81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 82 { 83 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 84 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 85 86 PetscFunctionBegin; 87 for ( i=0; i<m; i++ ) { 88 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 89 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 90 if (idxm[i] >= rstart && idxm[i] < rend) { 91 row = idxm[i] - rstart; 92 for ( j=0; j<n; j++ ) { 93 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 94 if (idxn[j] >= mdn->N) { 95 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 96 } 97 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 98 } 99 } else { 100 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 101 } 102 } 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNC__ 107 #define __FUNC__ "MatGetArray_MPIDense" 108 int MatGetArray_MPIDense(Mat A,Scalar **array) 109 { 110 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 111 int ierr; 112 113 PetscFunctionBegin; 114 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNC__ 119 #define __FUNC__ "MatGetSubMatrix_MPIDense" 120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 121 { 122 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 123 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 124 int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 125 Scalar *av, *bv, *v = lmat->v; 126 Mat newmat; 127 128 PetscFunctionBegin; 129 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 130 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 131 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 132 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 133 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 134 135 /* No parallel redistribution currently supported! Should really check each index set 136 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 137 original matrix! */ 138 139 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 140 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 141 142 /* Check submatrix call */ 143 if (scall == MAT_REUSE_MATRIX) { 144 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 145 /* Really need to test rows and column sizes! */ 146 newmat = *B; 147 } else { 148 /* Create and fill new matrix */ 149 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 150 } 151 152 /* Now extract the data pointers and do the copy, column at a time */ 153 newmatd = (Mat_MPIDense *) newmat->data; 154 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 155 156 for ( i=0; i<ncols; i++ ) { 157 av = v + nlrows*icol[i]; 158 for (j=0; j<nrows; j++ ) { 159 *bv++ = av[irow[j] - rstart]; 160 } 161 } 162 163 /* Assemble the matrices so that the correct flags are set */ 164 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166 167 /* Free work space */ 168 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 169 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 170 *B = newmat; 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNC__ 175 #define __FUNC__ "MatRestoreArray_MPIDense" 176 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 177 { 178 PetscFunctionBegin; 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNC__ 183 #define __FUNC__ "MatAssemblyBegin_MPIDense" 184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 187 MPI_Comm comm = mat->comm; 188 int ierr,nstash,reallocs; 189 InsertMode addv; 190 191 PetscFunctionBegin; 192 /* make sure all processors are either in INSERTMODE or ADDMODE */ 193 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 194 if (addv == (ADD_VALUES|INSERT_VALUES)) { 195 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 196 } 197 mat->insertmode = addv; /* in case this processor had no cache */ 198 199 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 200 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 201 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 202 nstash,reallocs); 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNC__ 207 #define __FUNC__ "MatAssemblyEnd_MPIDense" 208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 209 { 210 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 211 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 212 Scalar *val; 213 InsertMode addv=mat->insertmode; 214 215 PetscFunctionBegin; 216 /* wait on receives */ 217 while (1) { 218 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 219 if (!flg) break; 220 221 for ( i=0; i<n; ) { 222 /* Now identify the consecutive vals belonging to the same row */ 223 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 224 if (j < n) ncols = j-i; 225 else ncols = n-i; 226 /* Now assemble all these values with a single function call */ 227 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 228 i = j; 229 } 230 } 231 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 232 233 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 234 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 235 236 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 237 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 238 } 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNC__ 243 #define __FUNC__ "MatZeroEntries_MPIDense" 244 int MatZeroEntries_MPIDense(Mat A) 245 { 246 int ierr; 247 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 248 249 PetscFunctionBegin; 250 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNC__ 255 #define __FUNC__ "MatGetBlockSize_MPIDense" 256 int MatGetBlockSize_MPIDense(Mat A,int *bs) 257 { 258 PetscFunctionBegin; 259 *bs = 1; 260 PetscFunctionReturn(0); 261 } 262 263 /* the code does not do the diagonal entries correctly unless the 264 matrix is square and the column and row owerships are identical. 265 This is a BUG. The only way to fix it seems to be to access 266 mdn->A and mdn->B directly and not through the MatZeroRows() 267 routine. 268 */ 269 #undef __FUNC__ 270 #define __FUNC__ "MatZeroRows_MPIDense" 271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 272 { 273 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 274 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 275 int *procs,*nprocs,j,found,idx,nsends,*work; 276 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 277 int *rvalues,tag = A->tag,count,base,slen,n,*source; 278 int *lens,imdex,*lrows,*values; 279 MPI_Comm comm = A->comm; 280 MPI_Request *send_waits,*recv_waits; 281 MPI_Status recv_status,*send_status; 282 IS istmp; 283 284 PetscFunctionBegin; 285 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 286 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 287 288 /* first count number of contributors to each processor */ 289 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 290 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 291 procs = nprocs + size; 292 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 293 for ( i=0; i<N; i++ ) { 294 idx = rows[i]; 295 found = 0; 296 for ( j=0; j<size; j++ ) { 297 if (idx >= owners[j] && idx < owners[j+1]) { 298 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 299 } 300 } 301 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 302 } 303 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 304 305 /* inform other processors of number of messages and max length*/ 306 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 307 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 308 nrecvs = work[rank]; 309 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 310 nmax = work[rank]; 311 ierr = PetscFree(work);CHKERRQ(ierr); 312 313 /* post receives: */ 314 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 315 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 326 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 327 starts[0] = 0; 328 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 329 for ( i=0; i<N; i++ ) { 330 svalues[starts[owner[i]]++] = rows[i]; 331 } 332 ISRestoreIndices(is,&rows); 333 334 starts[0] = 0; 335 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 336 count = 0; 337 for ( i=0; i<size; i++ ) { 338 if (procs[i]) { 339 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 340 } 341 } 342 ierr = PetscFree(starts);CHKERRQ(ierr); 343 344 base = owners[rank]; 345 346 /* wait on receives */ 347 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 348 source = lens + nrecvs; 349 count = nrecvs; slen = 0; 350 while (count) { 351 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 352 /* unpack receives into our local space */ 353 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 354 source[imdex] = recv_status.MPI_SOURCE; 355 lens[imdex] = n; 356 slen += n; 357 count--; 358 } 359 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 360 361 /* move the data into the send scatter */ 362 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 363 count = 0; 364 for ( i=0; i<nrecvs; i++ ) { 365 values = rvalues + i*nmax; 366 for ( j=0; j<lens[i]; j++ ) { 367 lrows[count++] = values[j] - base; 368 } 369 } 370 ierr = PetscFree(rvalues);CHKERRQ(ierr); 371 ierr = PetscFree(lens);CHKERRQ(ierr); 372 ierr = PetscFree(owner);CHKERRQ(ierr); 373 ierr = PetscFree(nprocs);CHKERRQ(ierr); 374 375 /* actually zap the local rows */ 376 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 377 PLogObjectParent(A,istmp); 378 ierr = PetscFree(lrows);CHKERRQ(ierr); 379 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 380 ierr = ISDestroy(istmp);CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 385 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 386 ierr = PetscFree(send_status);CHKERRQ(ierr); 387 } 388 ierr = PetscFree(send_waits);CHKERRQ(ierr); 389 ierr = PetscFree(svalues);CHKERRQ(ierr); 390 391 PetscFunctionReturn(0); 392 } 393 394 #undef __FUNC__ 395 #define __FUNC__ "MatMult_MPIDense" 396 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 397 { 398 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 399 int ierr; 400 401 PetscFunctionBegin; 402 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 403 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 404 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatMultAdd_MPIDense" 410 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 411 { 412 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 413 int ierr; 414 415 PetscFunctionBegin; 416 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 417 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 418 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNC__ 423 #define __FUNC__ "MatMultTrans_MPIDense" 424 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 425 { 426 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 427 int ierr; 428 Scalar zero = 0.0; 429 430 PetscFunctionBegin; 431 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 432 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 433 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 434 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNC__ 439 #define __FUNC__ "MatMultTransAdd_MPIDense" 440 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 443 int ierr; 444 445 PetscFunctionBegin; 446 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 447 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 448 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 449 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNC__ 454 #define __FUNC__ "MatGetDiagonal_MPIDense" 455 int MatGetDiagonal_MPIDense(Mat A,Vec v) 456 { 457 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 458 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 459 int ierr, len, i, n, m = a->m, radd; 460 Scalar *x, zero = 0.0; 461 462 PetscFunctionBegin; 463 VecSet(&zero,v); 464 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 465 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 466 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 467 len = PetscMin(aloc->m,aloc->n); 468 radd = a->rstart*m; 469 for ( i=0; i<len; i++ ) { 470 x[i] = aloc->v[radd + i*m + i]; 471 } 472 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ "MatDestroy_MPIDense" 478 int MatDestroy_MPIDense(Mat mat) 479 { 480 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 481 int ierr; 482 483 PetscFunctionBegin; 484 485 if (mat->mapping) { 486 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 487 } 488 if (mat->bmapping) { 489 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 490 } 491 #if defined(PETSC_USE_LOG) 492 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 493 #endif 494 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 495 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 496 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 497 if (mdn->lvec) VecDestroy(mdn->lvec); 498 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 499 if (mdn->factor) { 500 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 501 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 502 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 503 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 504 } 505 ierr = PetscFree(mdn);CHKERRQ(ierr); 506 if (mat->rmap) { 507 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 508 } 509 if (mat->cmap) { 510 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 511 } 512 PLogObjectDestroy(mat); 513 PetscHeaderDestroy(mat); 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNC__ 518 #define __FUNC__ "MatView_MPIDense_Binary" 519 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 520 { 521 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 522 int ierr; 523 524 PetscFunctionBegin; 525 if (mdn->size == 1) { 526 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 527 } 528 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 529 PetscFunctionReturn(0); 530 } 531 532 #undef __FUNC__ 533 #define __FUNC__ "MatView_MPIDense_ASCII" 534 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 535 { 536 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 537 int ierr, format, size = mdn->size, rank = mdn->rank; 538 FILE *fd; 539 ViewerType vtype; 540 541 PetscFunctionBegin; 542 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 543 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 544 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 545 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 546 MatInfo info; 547 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 548 PetscSequentialPhaseBegin(mat->comm,1); 549 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 550 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 551 fflush(fd); 552 PetscSequentialPhaseEnd(mat->comm,1); 553 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 556 PetscFunctionReturn(0); 557 } 558 559 if (size == 1) { 560 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 561 } else { 562 /* assemble the entire matrix onto first processor. */ 563 Mat A; 564 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 565 Scalar *vals; 566 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 567 568 if (!rank) { 569 ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 570 } else { 571 ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 572 } 573 PLogObjectParent(mat,A); 574 575 /* Copy the matrix ... This isn't the most efficient means, 576 but it's quick for now */ 577 row = mdn->rstart; m = Amdn->m; 578 for ( i=0; i<m; i++ ) { 579 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 580 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 581 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 582 row++; 583 } 584 585 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 586 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 587 if (!rank) { 588 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 589 } 590 ierr = MatDestroy(A);CHKERRQ(ierr); 591 } 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNC__ 596 #define __FUNC__ "MatView_MPIDense" 597 int MatView_MPIDense(Mat mat,Viewer viewer) 598 { 599 int ierr; 600 ViewerType vtype; 601 602 PetscFunctionBegin; 603 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 604 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 605 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 606 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 607 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 608 } else { 609 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 610 } 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatGetInfo_MPIDense" 616 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 617 { 618 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 619 Mat mdn = mat->A; 620 int ierr; 621 double isend[5], irecv[5]; 622 623 PetscFunctionBegin; 624 info->rows_global = (double)mat->M; 625 info->columns_global = (double)mat->N; 626 info->rows_local = (double)mat->m; 627 info->columns_local = (double)mat->N; 628 info->block_size = 1.0; 629 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 630 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 631 isend[3] = info->memory; isend[4] = info->mallocs; 632 if (flag == MAT_LOCAL) { 633 info->nz_used = isend[0]; 634 info->nz_allocated = isend[1]; 635 info->nz_unneeded = isend[2]; 636 info->memory = isend[3]; 637 info->mallocs = isend[4]; 638 } else if (flag == MAT_GLOBAL_MAX) { 639 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 640 info->nz_used = irecv[0]; 641 info->nz_allocated = irecv[1]; 642 info->nz_unneeded = irecv[2]; 643 info->memory = irecv[3]; 644 info->mallocs = irecv[4]; 645 } else if (flag == MAT_GLOBAL_SUM) { 646 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 647 info->nz_used = irecv[0]; 648 info->nz_allocated = irecv[1]; 649 info->nz_unneeded = irecv[2]; 650 info->memory = irecv[3]; 651 info->mallocs = irecv[4]; 652 } 653 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 654 info->fill_ratio_needed = 0; 655 info->factor_mallocs = 0; 656 PetscFunctionReturn(0); 657 } 658 659 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 660 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 661 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 662 extern int MatSolve_MPIDense(Mat,Vec,Vec); 663 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 664 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 665 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 666 667 #undef __FUNC__ 668 #define __FUNC__ "MatSetOption_MPIDense" 669 int MatSetOption_MPIDense(Mat A,MatOption op) 670 { 671 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 672 673 PetscFunctionBegin; 674 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 675 op == MAT_YES_NEW_NONZERO_LOCATIONS || 676 op == MAT_NEW_NONZERO_LOCATION_ERR || 677 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 678 op == MAT_COLUMNS_SORTED || 679 op == MAT_COLUMNS_UNSORTED) { 680 MatSetOption(a->A,op); 681 } else if (op == MAT_ROW_ORIENTED) { 682 a->roworiented = 1; 683 MatSetOption(a->A,op); 684 } else if (op == MAT_ROWS_SORTED || 685 op == MAT_ROWS_UNSORTED || 686 op == MAT_SYMMETRIC || 687 op == MAT_STRUCTURALLY_SYMMETRIC || 688 op == MAT_YES_NEW_DIAGONALS || 689 op == MAT_USE_HASH_TABLE) { 690 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 691 } else if (op == MAT_COLUMN_ORIENTED) { 692 a->roworiented = 0; MatSetOption(a->A,op); 693 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 694 a->donotstash = 1; 695 } else if (op == MAT_NO_NEW_DIAGONALS) { 696 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 697 } else { 698 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 699 } 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNC__ 704 #define __FUNC__ "MatGetSize_MPIDense" 705 int MatGetSize_MPIDense(Mat A,int *m,int *n) 706 { 707 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 708 709 PetscFunctionBegin; 710 *m = mat->M; *n = mat->N; 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNC__ 715 #define __FUNC__ "MatGetLocalSize_MPIDense" 716 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 717 { 718 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 719 720 PetscFunctionBegin; 721 *m = mat->m; *n = mat->N; 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 727 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 728 { 729 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 730 731 PetscFunctionBegin; 732 *m = mat->rstart; *n = mat->rend; 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNC__ 737 #define __FUNC__ "MatGetRow_MPIDense" 738 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 739 { 740 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 741 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 742 743 PetscFunctionBegin; 744 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 745 lrow = row - rstart; 746 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNC__ 751 #define __FUNC__ "MatRestoreRow_MPIDense" 752 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 753 { 754 int ierr; 755 756 PetscFunctionBegin; 757 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 758 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNC__ 763 #define __FUNC__ "MatDiagonalScale_MPIDense" 764 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 765 { 766 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 767 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 768 Scalar *l,*r,x,*v; 769 int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 770 771 PetscFunctionBegin; 772 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 773 if (ll) { 774 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 775 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 776 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 777 for ( i=0; i<m; i++ ) { 778 x = l[i]; 779 v = mat->v + i; 780 for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 781 } 782 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 783 PLogFlops(n*m); 784 } 785 if (rr) { 786 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 787 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 788 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 789 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 790 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 791 for ( i=0; i<n; i++ ) { 792 x = r[i]; 793 v = mat->v + i*m; 794 for ( j=0; j<m; j++ ) { (*v++) *= x;} 795 } 796 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 797 PLogFlops(n*m); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNC__ 803 #define __FUNC__ "MatNorm_MPIDense" 804 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 805 { 806 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 807 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 808 int ierr, i, j; 809 double sum = 0.0; 810 Scalar *v = mat->v; 811 812 PetscFunctionBegin; 813 if (mdn->size == 1) { 814 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 815 } else { 816 if (type == NORM_FROBENIUS) { 817 for (i=0; i<mat->n*mat->m; i++ ) { 818 #if defined(PETSC_USE_COMPLEX) 819 sum += PetscReal(PetscConj(*v)*(*v)); v++; 820 #else 821 sum += (*v)*(*v); v++; 822 #endif 823 } 824 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 825 *norm = sqrt(*norm); 826 PLogFlops(2*mat->n*mat->m); 827 } else if (type == NORM_1) { 828 double *tmp, *tmp2; 829 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 830 tmp2 = tmp + mdn->N; 831 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 832 *norm = 0.0; 833 v = mat->v; 834 for ( j=0; j<mat->n; j++ ) { 835 for ( i=0; i<mat->m; i++ ) { 836 tmp[j] += PetscAbsScalar(*v); v++; 837 } 838 } 839 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 840 for ( j=0; j<mdn->N; j++ ) { 841 if (tmp2[j] > *norm) *norm = tmp2[j]; 842 } 843 ierr = PetscFree(tmp);CHKERRQ(ierr); 844 PLogFlops(mat->n*mat->m); 845 } else if (type == NORM_INFINITY) { /* max row norm */ 846 double ntemp; 847 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 848 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 849 } else { 850 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 851 } 852 } 853 PetscFunctionReturn(0); 854 } 855 856 #undef __FUNC__ 857 #define __FUNC__ "MatTranspose_MPIDense" 858 int MatTranspose_MPIDense(Mat A,Mat *matout) 859 { 860 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 861 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 862 Mat B; 863 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 864 int j, i, ierr; 865 Scalar *v; 866 867 PetscFunctionBegin; 868 if (matout == PETSC_NULL && M != N) { 869 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 870 } 871 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 872 873 m = Aloc->m; n = Aloc->n; v = Aloc->v; 874 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 875 for ( j=0; j<n; j++ ) { 876 for (i=0; i<m; i++) rwork[i] = rstart + i; 877 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 878 v += m; 879 } 880 ierr = PetscFree(rwork);CHKERRQ(ierr); 881 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 882 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 if (matout != PETSC_NULL) { 884 *matout = B; 885 } else { 886 PetscOps *Abops; 887 MatOps Aops; 888 889 /* This isn't really an in-place transpose, but free data struct from a */ 890 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 891 ierr = MatDestroy(a->A);CHKERRQ(ierr); 892 if (a->lvec) VecDestroy(a->lvec); 893 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 894 ierr = PetscFree(a);CHKERRQ(ierr); 895 896 /* 897 This is horrible, horrible code. We need to keep the 898 A pointers for the bops and ops but copy everything 899 else from C. 900 */ 901 Abops = A->bops; 902 Aops = A->ops; 903 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 904 A->bops = Abops; 905 A->ops = Aops; 906 907 PetscHeaderDestroy(B); 908 } 909 PetscFunctionReturn(0); 910 } 911 912 #include "pinclude/blaslapack.h" 913 #undef __FUNC__ 914 #define __FUNC__ "MatScale_MPIDense" 915 int MatScale_MPIDense(Scalar *alpha,Mat inA) 916 { 917 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 918 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 919 int one = 1, nz; 920 921 PetscFunctionBegin; 922 nz = a->m*a->n; 923 BLscal_( &nz, alpha, a->v, &one ); 924 PLogFlops(nz); 925 PetscFunctionReturn(0); 926 } 927 928 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 929 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 930 931 /* -------------------------------------------------------------------*/ 932 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 933 MatGetRow_MPIDense, 934 MatRestoreRow_MPIDense, 935 MatMult_MPIDense, 936 MatMultAdd_MPIDense, 937 MatMultTrans_MPIDense, 938 MatMultTransAdd_MPIDense, 939 0, 940 0, 941 0, 942 0, 943 0, 944 0, 945 0, 946 MatTranspose_MPIDense, 947 MatGetInfo_MPIDense,0, 948 MatGetDiagonal_MPIDense, 949 MatDiagonalScale_MPIDense, 950 MatNorm_MPIDense, 951 MatAssemblyBegin_MPIDense, 952 MatAssemblyEnd_MPIDense, 953 0, 954 MatSetOption_MPIDense, 955 MatZeroEntries_MPIDense, 956 MatZeroRows_MPIDense, 957 0, 958 0, 959 0, 960 0, 961 MatGetSize_MPIDense, 962 MatGetLocalSize_MPIDense, 963 MatGetOwnershipRange_MPIDense, 964 0, 965 0, 966 MatGetArray_MPIDense, 967 MatRestoreArray_MPIDense, 968 MatDuplicate_MPIDense, 969 0, 970 0, 971 0, 972 0, 973 0, 974 MatGetSubMatrices_MPIDense, 975 0, 976 MatGetValues_MPIDense, 977 0, 978 0, 979 MatScale_MPIDense, 980 0, 981 0, 982 0, 983 MatGetBlockSize_MPIDense, 984 0, 985 0, 986 0, 987 0, 988 0, 989 0, 990 0, 991 0, 992 0, 993 MatGetSubMatrix_MPIDense, 994 0, 995 0, 996 MatGetMaps_Petsc}; 997 998 #undef __FUNC__ 999 #define __FUNC__ "MatCreateMPIDense" 1000 /*@C 1001 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1002 1003 Collective on MPI_Comm 1004 1005 Input Parameters: 1006 + comm - MPI communicator 1007 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1008 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1009 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1010 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1011 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1012 to control all matrix memory allocation. 1013 1014 Output Parameter: 1015 . A - the matrix 1016 1017 Notes: 1018 The dense format is fully compatible with standard Fortran 77 1019 storage by columns. 1020 1021 The data input variable is intended primarily for Fortran programmers 1022 who wish to allocate their own matrix memory space. Most users should 1023 set data=PETSC_NULL. 1024 1025 The user MUST specify either the local or global matrix dimensions 1026 (possibly both). 1027 1028 Level: intermediate 1029 1030 .keywords: matrix, dense, parallel 1031 1032 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1033 @*/ 1034 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1035 { 1036 Mat mat; 1037 Mat_MPIDense *a; 1038 int ierr, i,flg; 1039 1040 PetscFunctionBegin; 1041 /* Note: For now, when data is specified above, this assumes the user correctly 1042 allocates the local dense storage space. We should add error checking. */ 1043 1044 *A = 0; 1045 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1046 PLogObjectCreate(mat); 1047 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1048 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1049 mat->ops->destroy = MatDestroy_MPIDense; 1050 mat->ops->view = MatView_MPIDense; 1051 mat->factor = 0; 1052 mat->mapping = 0; 1053 1054 a->factor = 0; 1055 mat->insertmode = NOT_SET_VALUES; 1056 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1057 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1058 1059 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1060 1061 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1062 a->nvec = n; 1063 1064 /* each row stores all columns */ 1065 a->N = mat->N = N; 1066 a->M = mat->M = M; 1067 a->m = mat->m = m; 1068 a->n = mat->n = N; /* NOTE: n == N */ 1069 1070 /* the information in the maps duplicates the information computed below, eventually 1071 we should remove the duplicate information that is not contained in the maps */ 1072 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1073 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1074 1075 /* build local table of row and column ownerships */ 1076 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1077 a->cowners = a->rowners + a->size + 1; 1078 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1079 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1080 a->rowners[0] = 0; 1081 for ( i=2; i<=a->size; i++ ) { 1082 a->rowners[i] += a->rowners[i-1]; 1083 } 1084 a->rstart = a->rowners[a->rank]; 1085 a->rend = a->rowners[a->rank+1]; 1086 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1087 a->cowners[0] = 0; 1088 for ( i=2; i<=a->size; i++ ) { 1089 a->cowners[i] += a->cowners[i-1]; 1090 } 1091 1092 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1093 PLogObjectParent(mat,a->A); 1094 1095 /* build cache for off array entries formed */ 1096 a->donotstash = 0; 1097 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1098 1099 /* stuff used for matrix vector multiply */ 1100 a->lvec = 0; 1101 a->Mvctx = 0; 1102 a->roworiented = 1; 1103 1104 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C", 1105 "MatGetDiagonalBlock_MPIDense", 1106 (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1107 1108 *A = mat; 1109 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1110 if (flg) { 1111 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNC__ 1117 #define __FUNC__ "MatDuplicate_MPIDense" 1118 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1119 { 1120 Mat mat; 1121 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1122 int ierr; 1123 FactorCtx *factor; 1124 1125 PetscFunctionBegin; 1126 *newmat = 0; 1127 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1128 PLogObjectCreate(mat); 1129 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1130 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1131 mat->ops->destroy = MatDestroy_MPIDense; 1132 mat->ops->view = MatView_MPIDense; 1133 mat->factor = A->factor; 1134 mat->assembled = PETSC_TRUE; 1135 1136 a->m = mat->m = oldmat->m; 1137 a->n = mat->n = oldmat->n; 1138 a->M = mat->M = oldmat->M; 1139 a->N = mat->N = oldmat->N; 1140 if (oldmat->factor) { 1141 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1142 /* copy factor contents ... add this code! */ 1143 } else a->factor = 0; 1144 1145 a->rstart = oldmat->rstart; 1146 a->rend = oldmat->rend; 1147 a->size = oldmat->size; 1148 a->rank = oldmat->rank; 1149 mat->insertmode = NOT_SET_VALUES; 1150 a->donotstash = oldmat->donotstash; 1151 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1152 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1153 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1154 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1155 1156 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1157 PLogObjectParent(mat,a->lvec); 1158 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1159 PLogObjectParent(mat,a->Mvctx); 1160 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1161 PLogObjectParent(mat,a->A); 1162 *newmat = mat; 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #include "sys.h" 1167 1168 #undef __FUNC__ 1169 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1170 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1171 { 1172 int *rowners, i,size,rank,m,ierr,nz,j; 1173 Scalar *array,*vals,*vals_ptr; 1174 MPI_Status status; 1175 1176 PetscFunctionBegin; 1177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1179 1180 /* determine ownership of all rows */ 1181 m = M/size + ((M % size) > rank); 1182 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1183 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1184 rowners[0] = 0; 1185 for ( i=2; i<=size; i++ ) { 1186 rowners[i] += rowners[i-1]; 1187 } 1188 1189 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1190 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1191 1192 if (!rank) { 1193 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1194 1195 /* read in my part of the matrix numerical values */ 1196 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1197 1198 /* insert into matrix-by row (this is why cannot directly read into array */ 1199 vals_ptr = vals; 1200 for ( i=0; i<m; i++ ) { 1201 for ( j=0; j<N; j++ ) { 1202 array[i + j*m] = *vals_ptr++; 1203 } 1204 } 1205 1206 /* read in other processors and ship out */ 1207 for ( i=1; i<size; i++ ) { 1208 nz = (rowners[i+1] - rowners[i])*N; 1209 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1210 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1211 } 1212 } else { 1213 /* receive numeric values */ 1214 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1215 1216 /* receive message of values*/ 1217 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1218 1219 /* insert into matrix-by row (this is why cannot directly read into array */ 1220 vals_ptr = vals; 1221 for ( i=0; i<m; i++ ) { 1222 for ( j=0; j<N; j++ ) { 1223 array[i + j*m] = *vals_ptr++; 1224 } 1225 } 1226 } 1227 ierr = PetscFree(rowners);CHKERRQ(ierr); 1228 ierr = PetscFree(vals);CHKERRQ(ierr); 1229 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1230 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 1235 #undef __FUNC__ 1236 #define __FUNC__ "MatLoad_MPIDense" 1237 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1238 { 1239 Mat A; 1240 Scalar *vals,*svals; 1241 MPI_Comm comm = ((PetscObject)viewer)->comm; 1242 MPI_Status status; 1243 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1244 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1245 int tag = ((PetscObject)viewer)->tag; 1246 int i, nz, ierr, j,rstart, rend, fd; 1247 1248 PetscFunctionBegin; 1249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1250 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1251 if (!rank) { 1252 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1253 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1254 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1255 } 1256 1257 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1258 M = header[1]; N = header[2]; nz = header[3]; 1259 1260 /* 1261 Handle case where matrix is stored on disk as a dense matrix 1262 */ 1263 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1264 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 /* determine ownership of all rows */ 1269 m = M/size + ((M % size) > rank); 1270 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1271 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1272 rowners[0] = 0; 1273 for ( i=2; i<=size; i++ ) { 1274 rowners[i] += rowners[i-1]; 1275 } 1276 rstart = rowners[rank]; 1277 rend = rowners[rank+1]; 1278 1279 /* distribute row lengths to all processors */ 1280 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1281 offlens = ourlens + (rend-rstart); 1282 if (!rank) { 1283 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1284 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1285 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1286 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1287 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1288 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1289 } else { 1290 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1291 } 1292 1293 if (!rank) { 1294 /* calculate the number of nonzeros on each processor */ 1295 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1296 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1297 for ( i=0; i<size; i++ ) { 1298 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1299 procsnz[i] += rowlengths[j]; 1300 } 1301 } 1302 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1303 1304 /* determine max buffer needed and allocate it */ 1305 maxnz = 0; 1306 for ( i=0; i<size; i++ ) { 1307 maxnz = PetscMax(maxnz,procsnz[i]); 1308 } 1309 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1310 1311 /* read in my part of the matrix column indices */ 1312 nz = procsnz[0]; 1313 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1314 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1315 1316 /* read in every one elses and ship off */ 1317 for ( i=1; i<size; i++ ) { 1318 nz = procsnz[i]; 1319 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1320 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1321 } 1322 ierr = PetscFree(cols);CHKERRQ(ierr); 1323 } else { 1324 /* determine buffer space needed for message */ 1325 nz = 0; 1326 for ( i=0; i<m; i++ ) { 1327 nz += ourlens[i]; 1328 } 1329 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1330 1331 /* receive message of column indices*/ 1332 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1333 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1334 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1335 } 1336 1337 /* loop over local rows, determining number of off diagonal entries */ 1338 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1339 jj = 0; 1340 for ( i=0; i<m; i++ ) { 1341 for ( j=0; j<ourlens[i]; j++ ) { 1342 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1343 jj++; 1344 } 1345 } 1346 1347 /* create our matrix */ 1348 for ( i=0; i<m; i++ ) { 1349 ourlens[i] -= offlens[i]; 1350 } 1351 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1352 A = *newmat; 1353 for ( i=0; i<m; i++ ) { 1354 ourlens[i] += offlens[i]; 1355 } 1356 1357 if (!rank) { 1358 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1359 1360 /* read in my part of the matrix numerical values */ 1361 nz = procsnz[0]; 1362 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1363 1364 /* insert into matrix */ 1365 jj = rstart; 1366 smycols = mycols; 1367 svals = vals; 1368 for ( i=0; i<m; i++ ) { 1369 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1370 smycols += ourlens[i]; 1371 svals += ourlens[i]; 1372 jj++; 1373 } 1374 1375 /* read in other processors and ship out */ 1376 for ( i=1; i<size; i++ ) { 1377 nz = procsnz[i]; 1378 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1379 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1380 } 1381 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1382 } else { 1383 /* receive numeric values */ 1384 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1385 1386 /* receive message of values*/ 1387 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1388 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1389 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1390 1391 /* insert into matrix */ 1392 jj = rstart; 1393 smycols = mycols; 1394 svals = vals; 1395 for ( i=0; i<m; i++ ) { 1396 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1397 smycols += ourlens[i]; 1398 svals += ourlens[i]; 1399 jj++; 1400 } 1401 } 1402 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1403 ierr = PetscFree(vals);CHKERRQ(ierr); 1404 ierr = PetscFree(mycols);CHKERRQ(ierr); 1405 ierr = PetscFree(rowners);CHKERRQ(ierr); 1406 1407 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 1413 1414 1415 1416