1 /*$Id: mpidense.c,v 1.131 1999/11/05 14:45:15 bsmith Exp bsmith $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNC__ 12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 16 int m = mdn->m,rstart = mdn->rstart,rank,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 27 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 28 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 29 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 30 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33 34 *iscopy = PETSC_TRUE; 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 extern int MatSetUpMultiply_MPIDense(Mat); 40 41 #undef __FUNC__ 42 #define __FUNC__ "MatSetValues_MPIDense" 43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 44 { 45 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 46 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 47 int roworiented = A->roworiented; 48 49 PetscFunctionBegin; 50 for ( i=0; i<m; i++ ) { 51 if (idxm[i] < 0) continue; 52 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 if (roworiented) { 56 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 57 } else { 58 for ( j=0; j<n; j++ ) { 59 if (idxn[j] < 0) continue; 60 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 61 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 62 } 63 } 64 } else { 65 if ( !A->donotstash) { 66 if (roworiented) { 67 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 68 } else { 69 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 70 } 71 } 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetValues_MPIDense" 79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 80 { 81 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 82 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 83 84 PetscFunctionBegin; 85 for ( i=0; i<m; i++ ) { 86 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 87 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 88 if (idxm[i] >= rstart && idxm[i] < rend) { 89 row = idxm[i] - rstart; 90 for ( j=0; j<n; j++ ) { 91 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 92 if (idxn[j] >= mdn->N) { 93 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 94 } 95 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 96 } 97 } else { 98 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 99 } 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNC__ 105 #define __FUNC__ "MatGetArray_MPIDense" 106 int MatGetArray_MPIDense(Mat A,Scalar **array) 107 { 108 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 109 int ierr; 110 111 PetscFunctionBegin; 112 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNC__ 117 #define __FUNC__ "MatGetSubMatrix_MPIDense" 118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 119 { 120 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 121 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 122 int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 123 Scalar *av, *bv, *v = lmat->v; 124 Mat newmat; 125 126 PetscFunctionBegin; 127 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 128 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 129 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 130 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 131 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 132 133 /* No parallel redistribution currently supported! Should really check each index set 134 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 135 original matrix! */ 136 137 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 138 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 139 140 /* Check submatrix call */ 141 if (scall == MAT_REUSE_MATRIX) { 142 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 143 /* Really need to test rows and column sizes! */ 144 newmat = *B; 145 } else { 146 /* Create and fill new matrix */ 147 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 148 } 149 150 /* Now extract the data pointers and do the copy, column at a time */ 151 newmatd = (Mat_MPIDense *) newmat->data; 152 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 153 154 for ( i=0; i<ncols; i++ ) { 155 av = v + nlrows*icol[i]; 156 for (j=0; j<nrows; j++ ) { 157 *bv++ = av[irow[j] - rstart]; 158 } 159 } 160 161 /* Assemble the matrices so that the correct flags are set */ 162 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 165 /* Free work space */ 166 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 167 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 168 *B = newmat; 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNC__ 173 #define __FUNC__ "MatRestoreArray_MPIDense" 174 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 175 { 176 PetscFunctionBegin; 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ "MatAssemblyBegin_MPIDense" 182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 183 { 184 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 185 MPI_Comm comm = mat->comm; 186 int ierr,nstash,reallocs; 187 InsertMode addv; 188 189 PetscFunctionBegin; 190 /* make sure all processors are either in INSERTMODE or ADDMODE */ 191 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 192 if (addv == (ADD_VALUES|INSERT_VALUES)) { 193 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 194 } 195 mat->insertmode = addv; /* in case this processor had no cache */ 196 197 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 198 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 199 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatAssemblyEnd_MPIDense" 205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 206 { 207 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 208 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 209 Scalar *val; 210 InsertMode addv=mat->insertmode; 211 212 PetscFunctionBegin; 213 /* wait on receives */ 214 while (1) { 215 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 216 if (!flg) break; 217 218 for ( i=0; i<n; ) { 219 /* Now identify the consecutive vals belonging to the same row */ 220 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 221 if (j < n) ncols = j-i; 222 else ncols = n-i; 223 /* Now assemble all these values with a single function call */ 224 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 225 i = j; 226 } 227 } 228 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 229 230 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 235 } 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNC__ 240 #define __FUNC__ "MatZeroEntries_MPIDense" 241 int MatZeroEntries_MPIDense(Mat A) 242 { 243 int ierr; 244 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 245 246 PetscFunctionBegin; 247 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNC__ 252 #define __FUNC__ "MatGetBlockSize_MPIDense" 253 int MatGetBlockSize_MPIDense(Mat A,int *bs) 254 { 255 PetscFunctionBegin; 256 *bs = 1; 257 PetscFunctionReturn(0); 258 } 259 260 /* the code does not do the diagonal entries correctly unless the 261 matrix is square and the column and row owerships are identical. 262 This is a BUG. The only way to fix it seems to be to access 263 mdn->A and mdn->B directly and not through the MatZeroRows() 264 routine. 265 */ 266 #undef __FUNC__ 267 #define __FUNC__ "MatZeroRows_MPIDense" 268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 269 { 270 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 271 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 272 int *procs,*nprocs,j,found,idx,nsends,*work; 273 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 274 int *rvalues,tag = A->tag,count,base,slen,n,*source; 275 int *lens,imdex,*lrows,*values; 276 MPI_Comm comm = A->comm; 277 MPI_Request *send_waits,*recv_waits; 278 MPI_Status recv_status,*send_status; 279 IS istmp; 280 281 PetscFunctionBegin; 282 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 283 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 284 285 /* first count number of contributors to each processor */ 286 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 287 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 288 procs = nprocs + size; 289 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 290 for ( i=0; i<N; i++ ) { 291 idx = rows[i]; 292 found = 0; 293 for ( j=0; j<size; j++ ) { 294 if (idx >= owners[j] && idx < owners[j+1]) { 295 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 296 } 297 } 298 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 299 } 300 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 301 302 /* inform other processors of number of messages and max length*/ 303 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 304 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 305 nmax = work[rank]; 306 nrecvs = work[size+rank]; 307 ierr = PetscFree(work);CHKERRQ(ierr); 308 309 /* post receives: */ 310 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 311 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 312 for ( i=0; i<nrecvs; i++ ) { 313 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 314 } 315 316 /* do sends: 317 1) starts[i] gives the starting index in svalues for stuff going to 318 the ith processor 319 */ 320 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 321 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 322 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 323 starts[0] = 0; 324 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 325 for ( i=0; i<N; i++ ) { 326 svalues[starts[owner[i]]++] = rows[i]; 327 } 328 ISRestoreIndices(is,&rows); 329 330 starts[0] = 0; 331 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 332 count = 0; 333 for ( i=0; i<size; i++ ) { 334 if (procs[i]) { 335 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 336 } 337 } 338 ierr = PetscFree(starts);CHKERRQ(ierr); 339 340 base = owners[rank]; 341 342 /* wait on receives */ 343 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 344 source = lens + nrecvs; 345 count = nrecvs; slen = 0; 346 while (count) { 347 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 348 /* unpack receives into our local space */ 349 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 350 source[imdex] = recv_status.MPI_SOURCE; 351 lens[imdex] = n; 352 slen += n; 353 count--; 354 } 355 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 356 357 /* move the data into the send scatter */ 358 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 359 count = 0; 360 for ( i=0; i<nrecvs; i++ ) { 361 values = rvalues + i*nmax; 362 for ( j=0; j<lens[i]; j++ ) { 363 lrows[count++] = values[j] - base; 364 } 365 } 366 ierr = PetscFree(rvalues);CHKERRQ(ierr); 367 ierr = PetscFree(lens);CHKERRQ(ierr); 368 ierr = PetscFree(owner);CHKERRQ(ierr); 369 ierr = PetscFree(nprocs);CHKERRQ(ierr); 370 371 /* actually zap the local rows */ 372 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 373 PLogObjectParent(A,istmp); 374 ierr = PetscFree(lrows);CHKERRQ(ierr); 375 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 376 ierr = ISDestroy(istmp);CHKERRQ(ierr); 377 378 /* wait on sends */ 379 if (nsends) { 380 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 381 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 382 ierr = PetscFree(send_status);CHKERRQ(ierr); 383 } 384 ierr = PetscFree(send_waits);CHKERRQ(ierr); 385 ierr = PetscFree(svalues);CHKERRQ(ierr); 386 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNC__ 391 #define __FUNC__ "MatMult_MPIDense" 392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 395 int ierr; 396 397 PetscFunctionBegin; 398 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatMultAdd_MPIDense" 406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 407 { 408 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNC__ 419 #define __FUNC__ "MatMultTrans_MPIDense" 420 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 421 { 422 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 423 int ierr; 424 Scalar zero = 0.0; 425 426 PetscFunctionBegin; 427 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 428 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 429 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNC__ 435 #define __FUNC__ "MatMultTransAdd_MPIDense" 436 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 437 { 438 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 439 int ierr; 440 441 PetscFunctionBegin; 442 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 443 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 444 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNC__ 450 #define __FUNC__ "MatGetDiagonal_MPIDense" 451 int MatGetDiagonal_MPIDense(Mat A,Vec v) 452 { 453 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 454 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 455 int ierr, len, i, n, m = a->m, radd; 456 Scalar *x, zero = 0.0; 457 458 PetscFunctionBegin; 459 VecSet(&zero,v); 460 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 461 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 462 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 463 len = PetscMin(aloc->m,aloc->n); 464 radd = a->rstart*m; 465 for ( i=0; i<len; i++ ) { 466 x[i] = aloc->v[radd + i*m + i]; 467 } 468 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatDestroy_MPIDense" 474 int MatDestroy_MPIDense(Mat mat) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 481 if (mat->mapping) { 482 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 483 } 484 if (mat->bmapping) { 485 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 486 } 487 #if defined(PETSC_USE_LOG) 488 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 489 #endif 490 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 491 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 492 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 493 if (mdn->lvec) VecDestroy(mdn->lvec); 494 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 495 if (mdn->factor) { 496 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 497 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 498 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 499 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 500 } 501 ierr = PetscFree(mdn);CHKERRQ(ierr); 502 if (mat->rmap) { 503 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 504 } 505 if (mat->cmap) { 506 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 507 } 508 PLogObjectDestroy(mat); 509 PetscHeaderDestroy(mat); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNC__ 514 #define __FUNC__ "MatView_MPIDense_Binary" 515 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 516 { 517 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 518 int ierr; 519 520 PetscFunctionBegin; 521 if (mdn->size == 1) { 522 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 523 } 524 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNC__ 529 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket" 530 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer) 531 { 532 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 533 int ierr, format, size = mdn->size, rank = mdn->rank; 534 ViewerType vtype; 535 PetscTruth isascii,isdraw; 536 537 PetscFunctionBegin; 538 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 539 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 540 if (isascii) { 541 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 542 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 543 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 544 MatInfo info; 545 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 546 ierr = ViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 547 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 548 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 549 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 552 PetscFunctionReturn(0); 553 } 554 } else if (isdraw) { 555 Draw draw; 556 PetscTruth isnull; 557 558 ierr = ViewerDrawGetDraw(viewer, 0, &draw);CHKERRQ(ierr); 559 ierr = DrawIsNull(draw, &isnull);CHKERRQ(ierr); 560 if (isnull) PetscFunctionReturn(0); 561 } 562 563 if (size == 1) { 564 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 565 } else { 566 /* assemble the entire matrix onto first processor. */ 567 Mat A; 568 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 569 Scalar *vals; 570 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 571 572 if (!rank) { 573 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 574 } else { 575 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 576 } 577 PLogObjectParent(mat,A); 578 579 /* Copy the matrix ... This isn't the most efficient means, 580 but it's quick for now */ 581 row = mdn->rstart; m = Amdn->m; 582 for ( i=0; i<m; i++ ) { 583 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 584 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 586 row++; 587 } 588 589 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 590 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 591 if (!rank) { 592 Viewer sviewer; 593 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 594 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 595 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 596 } 597 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 598 ierr = MatDestroy(A);CHKERRQ(ierr); 599 } 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatView_MPIDense" 605 int MatView_MPIDense(Mat mat,Viewer viewer) 606 { 607 int ierr; 608 PetscTruth isascii,isbinary,isdraw,issocket; 609 610 PetscFunctionBegin; 611 612 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 613 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 614 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 615 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 616 617 if (isascii || issocket || isdraw) { 618 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 619 } else if (isbinary) { 620 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 621 } else { 622 SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNC__ 628 #define __FUNC__ "MatGetInfo_MPIDense" 629 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 630 { 631 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 632 Mat mdn = mat->A; 633 int ierr; 634 double isend[5], irecv[5]; 635 636 PetscFunctionBegin; 637 info->rows_global = (double)mat->M; 638 info->columns_global = (double)mat->N; 639 info->rows_local = (double)mat->m; 640 info->columns_local = (double)mat->N; 641 info->block_size = 1.0; 642 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 643 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 644 isend[3] = info->memory; isend[4] = info->mallocs; 645 if (flag == MAT_LOCAL) { 646 info->nz_used = isend[0]; 647 info->nz_allocated = isend[1]; 648 info->nz_unneeded = isend[2]; 649 info->memory = isend[3]; 650 info->mallocs = isend[4]; 651 } else if (flag == MAT_GLOBAL_MAX) { 652 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 653 info->nz_used = irecv[0]; 654 info->nz_allocated = irecv[1]; 655 info->nz_unneeded = irecv[2]; 656 info->memory = irecv[3]; 657 info->mallocs = irecv[4]; 658 } else if (flag == MAT_GLOBAL_SUM) { 659 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 660 info->nz_used = irecv[0]; 661 info->nz_allocated = irecv[1]; 662 info->nz_unneeded = irecv[2]; 663 info->memory = irecv[3]; 664 info->mallocs = irecv[4]; 665 } 666 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 667 info->fill_ratio_needed = 0; 668 info->factor_mallocs = 0; 669 PetscFunctionReturn(0); 670 } 671 672 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 673 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 674 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 675 extern int MatSolve_MPIDense(Mat,Vec,Vec); 676 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 677 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 678 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 679 680 #undef __FUNC__ 681 #define __FUNC__ "MatSetOption_MPIDense" 682 int MatSetOption_MPIDense(Mat A,MatOption op) 683 { 684 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 685 686 PetscFunctionBegin; 687 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 688 op == MAT_YES_NEW_NONZERO_LOCATIONS || 689 op == MAT_NEW_NONZERO_LOCATION_ERR || 690 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 691 op == MAT_COLUMNS_SORTED || 692 op == MAT_COLUMNS_UNSORTED) { 693 MatSetOption(a->A,op); 694 } else if (op == MAT_ROW_ORIENTED) { 695 a->roworiented = 1; 696 MatSetOption(a->A,op); 697 } else if (op == MAT_ROWS_SORTED || 698 op == MAT_ROWS_UNSORTED || 699 op == MAT_SYMMETRIC || 700 op == MAT_STRUCTURALLY_SYMMETRIC || 701 op == MAT_YES_NEW_DIAGONALS || 702 op == MAT_USE_HASH_TABLE) { 703 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 704 } else if (op == MAT_COLUMN_ORIENTED) { 705 a->roworiented = 0; MatSetOption(a->A,op); 706 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 707 a->donotstash = 1; 708 } else if (op == MAT_NO_NEW_DIAGONALS) { 709 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 710 } else { 711 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 712 } 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNC__ 717 #define __FUNC__ "MatGetSize_MPIDense" 718 int MatGetSize_MPIDense(Mat A,int *m,int *n) 719 { 720 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 721 722 PetscFunctionBegin; 723 if (m) *m = mat->M; 724 if (n) *n = mat->N; 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNC__ 729 #define __FUNC__ "MatGetLocalSize_MPIDense" 730 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 731 { 732 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 733 734 PetscFunctionBegin; 735 *m = mat->m; *n = mat->N; 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNC__ 740 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 741 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 742 { 743 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 744 745 PetscFunctionBegin; 746 *m = mat->rstart; *n = mat->rend; 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNC__ 751 #define __FUNC__ "MatGetRow_MPIDense" 752 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 753 { 754 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 755 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 756 757 PetscFunctionBegin; 758 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 759 lrow = row - rstart; 760 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNC__ 765 #define __FUNC__ "MatRestoreRow_MPIDense" 766 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 767 { 768 int ierr; 769 770 PetscFunctionBegin; 771 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 772 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNC__ 777 #define __FUNC__ "MatDiagonalScale_MPIDense" 778 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 779 { 780 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 781 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 782 Scalar *l,*r,x,*v; 783 int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 784 785 PetscFunctionBegin; 786 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 787 if (ll) { 788 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 789 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 790 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 791 for ( i=0; i<m; i++ ) { 792 x = l[i]; 793 v = mat->v + i; 794 for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 795 } 796 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 797 PLogFlops(n*m); 798 } 799 if (rr) { 800 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 801 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 802 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 803 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 804 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 805 for ( i=0; i<n; i++ ) { 806 x = r[i]; 807 v = mat->v + i*m; 808 for ( j=0; j<m; j++ ) { (*v++) *= x;} 809 } 810 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 811 PLogFlops(n*m); 812 } 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNC__ 817 #define __FUNC__ "MatNorm_MPIDense" 818 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 819 { 820 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 821 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 822 int ierr, i, j; 823 double sum = 0.0; 824 Scalar *v = mat->v; 825 826 PetscFunctionBegin; 827 if (mdn->size == 1) { 828 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 829 } else { 830 if (type == NORM_FROBENIUS) { 831 for (i=0; i<mat->n*mat->m; i++ ) { 832 #if defined(PETSC_USE_COMPLEX) 833 sum += PetscReal(PetscConj(*v)*(*v)); v++; 834 #else 835 sum += (*v)*(*v); v++; 836 #endif 837 } 838 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 839 *norm = sqrt(*norm); 840 PLogFlops(2*mat->n*mat->m); 841 } else if (type == NORM_1) { 842 double *tmp, *tmp2; 843 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 844 tmp2 = tmp + mdn->N; 845 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 846 *norm = 0.0; 847 v = mat->v; 848 for ( j=0; j<mat->n; j++ ) { 849 for ( i=0; i<mat->m; i++ ) { 850 tmp[j] += PetscAbsScalar(*v); v++; 851 } 852 } 853 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 854 for ( j=0; j<mdn->N; j++ ) { 855 if (tmp2[j] > *norm) *norm = tmp2[j]; 856 } 857 ierr = PetscFree(tmp);CHKERRQ(ierr); 858 PLogFlops(mat->n*mat->m); 859 } else if (type == NORM_INFINITY) { /* max row norm */ 860 double ntemp; 861 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 862 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 863 } else { 864 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 865 } 866 } 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNC__ 871 #define __FUNC__ "MatTranspose_MPIDense" 872 int MatTranspose_MPIDense(Mat A,Mat *matout) 873 { 874 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 875 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 876 Mat B; 877 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 878 int j, i, ierr; 879 Scalar *v; 880 881 PetscFunctionBegin; 882 if (matout == PETSC_NULL && M != N) { 883 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 884 } 885 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 886 887 m = Aloc->m; n = Aloc->n; v = Aloc->v; 888 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 889 for ( j=0; j<n; j++ ) { 890 for (i=0; i<m; i++) rwork[i] = rstart + i; 891 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 892 v += m; 893 } 894 ierr = PetscFree(rwork);CHKERRQ(ierr); 895 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 if (matout != PETSC_NULL) { 898 *matout = B; 899 } else { 900 PetscOps *Abops; 901 MatOps Aops; 902 903 /* This isn't really an in-place transpose, but free data struct from a */ 904 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 905 ierr = MatDestroy(a->A);CHKERRQ(ierr); 906 if (a->lvec) VecDestroy(a->lvec); 907 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 908 ierr = PetscFree(a);CHKERRQ(ierr); 909 910 /* 911 This is horrible, horrible code. We need to keep the 912 A pointers for the bops and ops but copy everything 913 else from C. 914 */ 915 Abops = A->bops; 916 Aops = A->ops; 917 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 918 A->bops = Abops; 919 A->ops = Aops; 920 921 PetscHeaderDestroy(B); 922 } 923 PetscFunctionReturn(0); 924 } 925 926 #include "pinclude/blaslapack.h" 927 #undef __FUNC__ 928 #define __FUNC__ "MatScale_MPIDense" 929 int MatScale_MPIDense(Scalar *alpha,Mat inA) 930 { 931 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 932 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 933 int one = 1, nz; 934 935 PetscFunctionBegin; 936 nz = a->m*a->n; 937 BLscal_( &nz, alpha, a->v, &one ); 938 PLogFlops(nz); 939 PetscFunctionReturn(0); 940 } 941 942 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 943 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 944 945 /* -------------------------------------------------------------------*/ 946 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 947 MatGetRow_MPIDense, 948 MatRestoreRow_MPIDense, 949 MatMult_MPIDense, 950 MatMultAdd_MPIDense, 951 MatMultTrans_MPIDense, 952 MatMultTransAdd_MPIDense, 953 0, 954 0, 955 0, 956 0, 957 0, 958 0, 959 0, 960 MatTranspose_MPIDense, 961 MatGetInfo_MPIDense,0, 962 MatGetDiagonal_MPIDense, 963 MatDiagonalScale_MPIDense, 964 MatNorm_MPIDense, 965 MatAssemblyBegin_MPIDense, 966 MatAssemblyEnd_MPIDense, 967 0, 968 MatSetOption_MPIDense, 969 MatZeroEntries_MPIDense, 970 MatZeroRows_MPIDense, 971 0, 972 0, 973 0, 974 0, 975 MatGetSize_MPIDense, 976 MatGetLocalSize_MPIDense, 977 MatGetOwnershipRange_MPIDense, 978 0, 979 0, 980 MatGetArray_MPIDense, 981 MatRestoreArray_MPIDense, 982 MatDuplicate_MPIDense, 983 0, 984 0, 985 0, 986 0, 987 0, 988 MatGetSubMatrices_MPIDense, 989 0, 990 MatGetValues_MPIDense, 991 0, 992 0, 993 MatScale_MPIDense, 994 0, 995 0, 996 0, 997 MatGetBlockSize_MPIDense, 998 0, 999 0, 1000 0, 1001 0, 1002 0, 1003 0, 1004 0, 1005 0, 1006 0, 1007 MatGetSubMatrix_MPIDense, 1008 0, 1009 0, 1010 MatGetMaps_Petsc}; 1011 1012 #undef __FUNC__ 1013 #define __FUNC__ "MatCreateMPIDense" 1014 /*@C 1015 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1016 1017 Collective on MPI_Comm 1018 1019 Input Parameters: 1020 + comm - MPI communicator 1021 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1022 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1023 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1024 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1025 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1026 to control all matrix memory allocation. 1027 1028 Output Parameter: 1029 . A - the matrix 1030 1031 Notes: 1032 The dense format is fully compatible with standard Fortran 77 1033 storage by columns. 1034 1035 The data input variable is intended primarily for Fortran programmers 1036 who wish to allocate their own matrix memory space. Most users should 1037 set data=PETSC_NULL. 1038 1039 The user MUST specify either the local or global matrix dimensions 1040 (possibly both). 1041 1042 Level: intermediate 1043 1044 .keywords: matrix, dense, parallel 1045 1046 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1047 @*/ 1048 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1049 { 1050 Mat mat; 1051 Mat_MPIDense *a; 1052 int ierr, i; 1053 PetscTruth flg; 1054 1055 PetscFunctionBegin; 1056 /* Note: For now, when data is specified above, this assumes the user correctly 1057 allocates the local dense storage space. We should add error checking. */ 1058 1059 *A = 0; 1060 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1061 PLogObjectCreate(mat); 1062 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1063 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1064 mat->ops->destroy = MatDestroy_MPIDense; 1065 mat->ops->view = MatView_MPIDense; 1066 mat->factor = 0; 1067 mat->mapping = 0; 1068 1069 a->factor = 0; 1070 mat->insertmode = NOT_SET_VALUES; 1071 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1072 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1073 1074 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1075 1076 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1077 a->nvec = n; 1078 1079 /* each row stores all columns */ 1080 a->N = mat->N = N; 1081 a->M = mat->M = M; 1082 a->m = mat->m = m; 1083 a->n = mat->n = N; /* NOTE: n == N */ 1084 1085 /* the information in the maps duplicates the information computed below, eventually 1086 we should remove the duplicate information that is not contained in the maps */ 1087 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1088 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1089 1090 /* build local table of row and column ownerships */ 1091 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1092 a->cowners = a->rowners + a->size + 1; 1093 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1094 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1095 a->rowners[0] = 0; 1096 for ( i=2; i<=a->size; i++ ) { 1097 a->rowners[i] += a->rowners[i-1]; 1098 } 1099 a->rstart = a->rowners[a->rank]; 1100 a->rend = a->rowners[a->rank+1]; 1101 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1102 a->cowners[0] = 0; 1103 for ( i=2; i<=a->size; i++ ) { 1104 a->cowners[i] += a->cowners[i-1]; 1105 } 1106 1107 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1108 PLogObjectParent(mat,a->A); 1109 1110 /* build cache for off array entries formed */ 1111 a->donotstash = 0; 1112 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1113 1114 /* stuff used for matrix vector multiply */ 1115 a->lvec = 0; 1116 a->Mvctx = 0; 1117 a->roworiented = 1; 1118 1119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1120 "MatGetDiagonalBlock_MPIDense", 1121 (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1122 1123 *A = mat; 1124 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1125 if (flg) { 1126 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1127 } 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNC__ 1132 #define __FUNC__ "MatDuplicate_MPIDense" 1133 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1134 { 1135 Mat mat; 1136 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1137 int ierr; 1138 FactorCtx *factor; 1139 1140 PetscFunctionBegin; 1141 *newmat = 0; 1142 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1143 PLogObjectCreate(mat); 1144 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1145 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1146 mat->ops->destroy = MatDestroy_MPIDense; 1147 mat->ops->view = MatView_MPIDense; 1148 mat->factor = A->factor; 1149 mat->assembled = PETSC_TRUE; 1150 1151 a->m = mat->m = oldmat->m; 1152 a->n = mat->n = oldmat->n; 1153 a->M = mat->M = oldmat->M; 1154 a->N = mat->N = oldmat->N; 1155 if (oldmat->factor) { 1156 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1157 /* copy factor contents ... add this code! */ 1158 } else a->factor = 0; 1159 1160 a->rstart = oldmat->rstart; 1161 a->rend = oldmat->rend; 1162 a->size = oldmat->size; 1163 a->rank = oldmat->rank; 1164 mat->insertmode = NOT_SET_VALUES; 1165 a->donotstash = oldmat->donotstash; 1166 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1167 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1168 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1169 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1170 1171 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1172 PLogObjectParent(mat,a->lvec); 1173 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1174 PLogObjectParent(mat,a->Mvctx); 1175 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1176 PLogObjectParent(mat,a->A); 1177 *newmat = mat; 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #include "sys.h" 1182 1183 #undef __FUNC__ 1184 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1185 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1186 { 1187 int *rowners, i,size,rank,m,ierr,nz,j; 1188 Scalar *array,*vals,*vals_ptr; 1189 MPI_Status status; 1190 1191 PetscFunctionBegin; 1192 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1193 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1194 1195 /* determine ownership of all rows */ 1196 m = M/size + ((M % size) > rank); 1197 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1198 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1199 rowners[0] = 0; 1200 for ( i=2; i<=size; i++ ) { 1201 rowners[i] += rowners[i-1]; 1202 } 1203 1204 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1205 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1206 1207 if (!rank) { 1208 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1209 1210 /* read in my part of the matrix numerical values */ 1211 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1212 1213 /* insert into matrix-by row (this is why cannot directly read into array */ 1214 vals_ptr = vals; 1215 for ( i=0; i<m; i++ ) { 1216 for ( j=0; j<N; j++ ) { 1217 array[i + j*m] = *vals_ptr++; 1218 } 1219 } 1220 1221 /* read in other processors and ship out */ 1222 for ( i=1; i<size; i++ ) { 1223 nz = (rowners[i+1] - rowners[i])*N; 1224 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1225 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1226 } 1227 } else { 1228 /* receive numeric values */ 1229 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1230 1231 /* receive message of values*/ 1232 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1233 1234 /* insert into matrix-by row (this is why cannot directly read into array */ 1235 vals_ptr = vals; 1236 for ( i=0; i<m; i++ ) { 1237 for ( j=0; j<N; j++ ) { 1238 array[i + j*m] = *vals_ptr++; 1239 } 1240 } 1241 } 1242 ierr = PetscFree(rowners);CHKERRQ(ierr); 1243 ierr = PetscFree(vals);CHKERRQ(ierr); 1244 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1245 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 1250 #undef __FUNC__ 1251 #define __FUNC__ "MatLoad_MPIDense" 1252 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1253 { 1254 Mat A; 1255 Scalar *vals,*svals; 1256 MPI_Comm comm = ((PetscObject)viewer)->comm; 1257 MPI_Status status; 1258 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1259 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1260 int tag = ((PetscObject)viewer)->tag; 1261 int i, nz, ierr, j,rstart, rend, fd; 1262 1263 PetscFunctionBegin; 1264 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1265 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1266 if (!rank) { 1267 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1268 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1269 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1270 } 1271 1272 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1273 M = header[1]; N = header[2]; nz = header[3]; 1274 1275 /* 1276 Handle case where matrix is stored on disk as a dense matrix 1277 */ 1278 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1279 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* determine ownership of all rows */ 1284 m = M/size + ((M % size) > rank); 1285 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1286 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1287 rowners[0] = 0; 1288 for ( i=2; i<=size; i++ ) { 1289 rowners[i] += rowners[i-1]; 1290 } 1291 rstart = rowners[rank]; 1292 rend = rowners[rank+1]; 1293 1294 /* distribute row lengths to all processors */ 1295 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1296 offlens = ourlens + (rend-rstart); 1297 if (!rank) { 1298 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1299 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1300 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1301 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1302 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1303 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1304 } else { 1305 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1306 } 1307 1308 if (!rank) { 1309 /* calculate the number of nonzeros on each processor */ 1310 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1311 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1312 for ( i=0; i<size; i++ ) { 1313 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1314 procsnz[i] += rowlengths[j]; 1315 } 1316 } 1317 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1318 1319 /* determine max buffer needed and allocate it */ 1320 maxnz = 0; 1321 for ( i=0; i<size; i++ ) { 1322 maxnz = PetscMax(maxnz,procsnz[i]); 1323 } 1324 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1325 1326 /* read in my part of the matrix column indices */ 1327 nz = procsnz[0]; 1328 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1329 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1330 1331 /* read in every one elses and ship off */ 1332 for ( i=1; i<size; i++ ) { 1333 nz = procsnz[i]; 1334 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1335 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1336 } 1337 ierr = PetscFree(cols);CHKERRQ(ierr); 1338 } else { 1339 /* determine buffer space needed for message */ 1340 nz = 0; 1341 for ( i=0; i<m; i++ ) { 1342 nz += ourlens[i]; 1343 } 1344 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1345 1346 /* receive message of column indices*/ 1347 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1348 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1349 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1350 } 1351 1352 /* loop over local rows, determining number of off diagonal entries */ 1353 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1354 jj = 0; 1355 for ( i=0; i<m; i++ ) { 1356 for ( j=0; j<ourlens[i]; j++ ) { 1357 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1358 jj++; 1359 } 1360 } 1361 1362 /* create our matrix */ 1363 for ( i=0; i<m; i++ ) { 1364 ourlens[i] -= offlens[i]; 1365 } 1366 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1367 A = *newmat; 1368 for ( i=0; i<m; i++ ) { 1369 ourlens[i] += offlens[i]; 1370 } 1371 1372 if (!rank) { 1373 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1374 1375 /* read in my part of the matrix numerical values */ 1376 nz = procsnz[0]; 1377 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1378 1379 /* insert into matrix */ 1380 jj = rstart; 1381 smycols = mycols; 1382 svals = vals; 1383 for ( i=0; i<m; i++ ) { 1384 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1385 smycols += ourlens[i]; 1386 svals += ourlens[i]; 1387 jj++; 1388 } 1389 1390 /* read in other processors and ship out */ 1391 for ( i=1; i<size; i++ ) { 1392 nz = procsnz[i]; 1393 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1394 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1395 } 1396 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1397 } else { 1398 /* receive numeric values */ 1399 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1400 1401 /* receive message of values*/ 1402 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1403 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1404 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1405 1406 /* insert into matrix */ 1407 jj = rstart; 1408 smycols = mycols; 1409 svals = vals; 1410 for ( i=0; i<m; i++ ) { 1411 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1412 smycols += ourlens[i]; 1413 svals += ourlens[i]; 1414 jj++; 1415 } 1416 } 1417 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1418 ierr = PetscFree(vals);CHKERRQ(ierr); 1419 ierr = PetscFree(mycols);CHKERRQ(ierr); 1420 ierr = PetscFree(rowners);CHKERRQ(ierr); 1421 1422 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1423 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 1428 1429 1430 1431