1 /*$Id: mpidense.c,v 1.138 2000/05/04 14:04:46 balay Exp balay $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNC__ 12 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = mdn->m,rstart = mdn->rstart,rank,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 27 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 28 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 29 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 30 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33 34 *iscopy = PETSC_TRUE; 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 extern int MatSetUpMultiply_MPIDense(Mat); 40 41 #undef __FUNC__ 42 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIDense" 43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 44 { 45 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 46 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 47 int roworiented = A->roworiented; 48 49 PetscFunctionBegin; 50 for (i=0; i<m; i++) { 51 if (idxm[i] < 0) continue; 52 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 if (roworiented) { 56 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 57 } else { 58 for (j=0; j<n; j++) { 59 if (idxn[j] < 0) continue; 60 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 61 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 62 } 63 } 64 } else { 65 if (!A->donotstash) { 66 if (roworiented) { 67 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 68 } else { 69 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 70 } 71 } 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIDense" 79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 80 { 81 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 82 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 83 84 PetscFunctionBegin; 85 for (i=0; i<m; i++) { 86 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 87 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 88 if (idxm[i] >= rstart && idxm[i] < rend) { 89 row = idxm[i] - rstart; 90 for (j=0; j<n; j++) { 91 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 92 if (idxn[j] >= mdn->N) { 93 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 94 } 95 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 96 } 97 } else { 98 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 99 } 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNC__ 105 #define __FUNC__ /*<a name=""></a>*/"MatGetArray_MPIDense" 106 int MatGetArray_MPIDense(Mat A,Scalar **array) 107 { 108 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 109 int ierr; 110 111 PetscFunctionBegin; 112 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNC__ 117 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIDense" 118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 119 { 120 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 121 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 122 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank; 123 Scalar *av,*bv,*v = lmat->v; 124 Mat newmat; 125 126 PetscFunctionBegin; 127 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 128 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 129 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 130 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 131 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 132 133 /* No parallel redistribution currently supported! Should really check each index set 134 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 135 original matrix! */ 136 137 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 138 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 139 140 /* Check submatrix call */ 141 if (scall == MAT_REUSE_MATRIX) { 142 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 143 /* Really need to test rows and column sizes! */ 144 newmat = *B; 145 } else { 146 /* Create and fill new matrix */ 147 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 148 } 149 150 /* Now extract the data pointers and do the copy, column at a time */ 151 newmatd = (Mat_MPIDense*)newmat->data; 152 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 153 154 for (i=0; i<ncols; i++) { 155 av = v + nlrows*icol[i]; 156 for (j=0; j<nrows; j++) { 157 *bv++ = av[irow[j] - rstart]; 158 } 159 } 160 161 /* Assemble the matrices so that the correct flags are set */ 162 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 165 /* Free work space */ 166 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 167 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 168 *B = newmat; 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNC__ 173 #define __FUNC__ /*<a name=""></a>*/"MatRestoreArray_MPIDense" 174 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 175 { 176 PetscFunctionBegin; 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIDense" 182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 183 { 184 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 185 MPI_Comm comm = mat->comm; 186 int ierr,nstash,reallocs; 187 InsertMode addv; 188 189 PetscFunctionBegin; 190 /* make sure all processors are either in INSERTMODE or ADDMODE */ 191 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 192 if (addv == (ADD_VALUES|INSERT_VALUES)) { 193 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 194 } 195 mat->insertmode = addv; /* in case this processor had no cache */ 196 197 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 198 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 199 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIDense" 205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 206 { 207 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 208 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 209 Scalar *val; 210 InsertMode addv=mat->insertmode; 211 212 PetscFunctionBegin; 213 /* wait on receives */ 214 while (1) { 215 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 216 if (!flg) break; 217 218 for (i=0; i<n;) { 219 /* Now identify the consecutive vals belonging to the same row */ 220 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 221 if (j < n) ncols = j-i; 222 else ncols = n-i; 223 /* Now assemble all these values with a single function call */ 224 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 225 i = j; 226 } 227 } 228 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 229 230 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 235 } 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNC__ 240 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIDense" 241 int MatZeroEntries_MPIDense(Mat A) 242 { 243 int ierr; 244 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 245 246 PetscFunctionBegin; 247 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNC__ 252 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIDense" 253 int MatGetBlockSize_MPIDense(Mat A,int *bs) 254 { 255 PetscFunctionBegin; 256 *bs = 1; 257 PetscFunctionReturn(0); 258 } 259 260 /* the code does not do the diagonal entries correctly unless the 261 matrix is square and the column and row owerships are identical. 262 This is a BUG. The only way to fix it seems to be to access 263 mdn->A and mdn->B directly and not through the MatZeroRows() 264 routine. 265 */ 266 #undef __FUNC__ 267 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIDense" 268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 269 { 270 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 271 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 272 int *procs,*nprocs,j,found,idx,nsends,*work; 273 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 274 int *rvalues,tag = A->tag,count,base,slen,n,*source; 275 int *lens,imdex,*lrows,*values; 276 MPI_Comm comm = A->comm; 277 MPI_Request *send_waits,*recv_waits; 278 MPI_Status recv_status,*send_status; 279 IS istmp; 280 281 PetscFunctionBegin; 282 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 283 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 284 285 /* first count number of contributors to each processor */ 286 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 287 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 288 procs = nprocs + size; 289 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 290 for (i=0; i<N; i++) { 291 idx = rows[i]; 292 found = 0; 293 for (j=0; j<size; j++) { 294 if (idx >= owners[j] && idx < owners[j+1]) { 295 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 296 } 297 } 298 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 299 } 300 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 301 302 /* inform other processors of number of messages and max length*/ 303 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 304 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 305 nmax = work[rank]; 306 nrecvs = work[size+rank]; 307 ierr = PetscFree(work);CHKERRQ(ierr); 308 309 /* post receives: */ 310 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 311 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 312 for (i=0; i<nrecvs; i++) { 313 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 314 } 315 316 /* do sends: 317 1) starts[i] gives the starting index in svalues for stuff going to 318 the ith processor 319 */ 320 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 321 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 322 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 323 starts[0] = 0; 324 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 325 for (i=0; i<N; i++) { 326 svalues[starts[owner[i]]++] = rows[i]; 327 } 328 ISRestoreIndices(is,&rows); 329 330 starts[0] = 0; 331 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 332 count = 0; 333 for (i=0; i<size; i++) { 334 if (procs[i]) { 335 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 336 } 337 } 338 ierr = PetscFree(starts);CHKERRQ(ierr); 339 340 base = owners[rank]; 341 342 /* wait on receives */ 343 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 344 source = lens + nrecvs; 345 count = nrecvs; slen = 0; 346 while (count) { 347 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 348 /* unpack receives into our local space */ 349 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 350 source[imdex] = recv_status.MPI_SOURCE; 351 lens[imdex] = n; 352 slen += n; 353 count--; 354 } 355 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 356 357 /* move the data into the send scatter */ 358 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 359 count = 0; 360 for (i=0; i<nrecvs; i++) { 361 values = rvalues + i*nmax; 362 for (j=0; j<lens[i]; j++) { 363 lrows[count++] = values[j] - base; 364 } 365 } 366 ierr = PetscFree(rvalues);CHKERRQ(ierr); 367 ierr = PetscFree(lens);CHKERRQ(ierr); 368 ierr = PetscFree(owner);CHKERRQ(ierr); 369 ierr = PetscFree(nprocs);CHKERRQ(ierr); 370 371 /* actually zap the local rows */ 372 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 373 PLogObjectParent(A,istmp); 374 ierr = PetscFree(lrows);CHKERRQ(ierr); 375 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 376 ierr = ISDestroy(istmp);CHKERRQ(ierr); 377 378 /* wait on sends */ 379 if (nsends) { 380 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 381 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 382 ierr = PetscFree(send_status);CHKERRQ(ierr); 383 } 384 ierr = PetscFree(send_waits);CHKERRQ(ierr); 385 ierr = PetscFree(svalues);CHKERRQ(ierr); 386 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNC__ 391 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIDense" 392 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 395 int ierr; 396 397 PetscFunctionBegin; 398 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIDense" 406 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 407 { 408 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNC__ 419 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIDense" 420 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 421 { 422 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 423 int ierr; 424 Scalar zero = 0.0; 425 426 PetscFunctionBegin; 427 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 428 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 429 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNC__ 435 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIDense" 436 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 437 { 438 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 439 int ierr; 440 441 PetscFunctionBegin; 442 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 443 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 444 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNC__ 450 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIDense" 451 int MatGetDiagonal_MPIDense(Mat A,Vec v) 452 { 453 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 454 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 455 int ierr,len,i,n,m = a->m,radd; 456 Scalar *x,zero = 0.0; 457 458 PetscFunctionBegin; 459 VecSet(&zero,v); 460 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 461 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 462 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 463 len = PetscMin(aloc->m,aloc->n); 464 radd = a->rstart*m; 465 for (i=0; i<len; i++) { 466 x[i] = aloc->v[radd + i*m + i]; 467 } 468 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIDense" 474 int MatDestroy_MPIDense(Mat mat) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 481 if (mat->mapping) { 482 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 483 } 484 if (mat->bmapping) { 485 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 486 } 487 #if defined(PETSC_USE_LOG) 488 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 489 #endif 490 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 491 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 492 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 493 if (mdn->lvec) VecDestroy(mdn->lvec); 494 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 495 if (mdn->factor) { 496 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 497 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 498 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 499 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 500 } 501 ierr = PetscFree(mdn);CHKERRQ(ierr); 502 if (mat->rmap) { 503 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 504 } 505 if (mat->cmap) { 506 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 507 } 508 PLogObjectDestroy(mat); 509 PetscHeaderDestroy(mat); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNC__ 514 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_Binary" 515 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 516 { 517 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 518 int ierr; 519 520 PetscFunctionBegin; 521 if (mdn->size == 1) { 522 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 523 } 524 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNC__ 529 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_ASCIIorDraworSocket" 530 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer) 531 { 532 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 533 int ierr,format,size = mdn->size,rank = mdn->rank; 534 ViewerType vtype; 535 PetscTruth isascii,isdraw; 536 537 PetscFunctionBegin; 538 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 539 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 540 if (isascii) { 541 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 542 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 543 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 544 MatInfo info; 545 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 546 ierr = ViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 547 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 548 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 549 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 552 PetscFunctionReturn(0); 553 } 554 } else if (isdraw) { 555 Draw draw; 556 PetscTruth isnull; 557 558 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 559 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); 560 if (isnull) PetscFunctionReturn(0); 561 } 562 563 if (size == 1) { 564 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 565 } else { 566 /* assemble the entire matrix onto first processor. */ 567 Mat A; 568 int M = mdn->M,N = mdn->N,m,row,i,nz,*cols; 569 Scalar *vals; 570 Mat_SeqDense *Amdn = (Mat_SeqDense*)mdn->A->data; 571 572 if (!rank) { 573 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 574 } else { 575 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 576 } 577 PLogObjectParent(mat,A); 578 579 /* Copy the matrix ... This isn't the most efficient means, 580 but it's quick for now */ 581 row = mdn->rstart; m = Amdn->m; 582 for (i=0; i<m; i++) { 583 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 584 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 586 row++; 587 } 588 589 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 590 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 591 if (!rank) { 592 Viewer sviewer; 593 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 594 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 595 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 596 } 597 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 598 ierr = MatDestroy(A);CHKERRQ(ierr); 599 } 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense" 605 int MatView_MPIDense(Mat mat,Viewer viewer) 606 { 607 int ierr; 608 PetscTruth isascii,isbinary,isdraw,issocket; 609 610 PetscFunctionBegin; 611 612 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 613 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 614 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 615 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 616 617 if (isascii || issocket || isdraw) { 618 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 619 } else if (isbinary) { 620 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 621 } else { 622 SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNC__ 628 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIDense" 629 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 630 { 631 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 632 Mat mdn = mat->A; 633 int ierr; 634 PetscReal isend[5],irecv[5]; 635 636 PetscFunctionBegin; 637 info->rows_global = (double)mat->M; 638 info->columns_global = (double)mat->N; 639 info->rows_local = (double)mat->m; 640 info->columns_local = (double)mat->N; 641 info->block_size = 1.0; 642 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 643 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 644 isend[3] = info->memory; isend[4] = info->mallocs; 645 if (flag == MAT_LOCAL) { 646 info->nz_used = isend[0]; 647 info->nz_allocated = isend[1]; 648 info->nz_unneeded = isend[2]; 649 info->memory = isend[3]; 650 info->mallocs = isend[4]; 651 } else if (flag == MAT_GLOBAL_MAX) { 652 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 653 info->nz_used = irecv[0]; 654 info->nz_allocated = irecv[1]; 655 info->nz_unneeded = irecv[2]; 656 info->memory = irecv[3]; 657 info->mallocs = irecv[4]; 658 } else if (flag == MAT_GLOBAL_SUM) { 659 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 660 info->nz_used = irecv[0]; 661 info->nz_allocated = irecv[1]; 662 info->nz_unneeded = irecv[2]; 663 info->memory = irecv[3]; 664 info->mallocs = irecv[4]; 665 } 666 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 667 info->fill_ratio_needed = 0; 668 info->factor_mallocs = 0; 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNC__ 673 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIDense" 674 int MatSetOption_MPIDense(Mat A,MatOption op) 675 { 676 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 677 678 PetscFunctionBegin; 679 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 680 op == MAT_YES_NEW_NONZERO_LOCATIONS || 681 op == MAT_NEW_NONZERO_LOCATION_ERR || 682 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 683 op == MAT_COLUMNS_SORTED || 684 op == MAT_COLUMNS_UNSORTED) { 685 MatSetOption(a->A,op); 686 } else if (op == MAT_ROW_ORIENTED) { 687 a->roworiented = 1; 688 MatSetOption(a->A,op); 689 } else if (op == MAT_ROWS_SORTED || 690 op == MAT_ROWS_UNSORTED || 691 op == MAT_SYMMETRIC || 692 op == MAT_STRUCTURALLY_SYMMETRIC || 693 op == MAT_YES_NEW_DIAGONALS || 694 op == MAT_USE_HASH_TABLE) { 695 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 696 } else if (op == MAT_COLUMN_ORIENTED) { 697 a->roworiented = 0; MatSetOption(a->A,op); 698 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 699 a->donotstash = 1; 700 } else if (op == MAT_NO_NEW_DIAGONALS) { 701 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 702 } else { 703 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 704 } 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNC__ 709 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIDense" 710 int MatGetSize_MPIDense(Mat A,int *m,int *n) 711 { 712 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 713 714 PetscFunctionBegin; 715 if (m) *m = mat->M; 716 if (n) *n = mat->N; 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNC__ 721 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIDense" 722 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 723 { 724 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 725 726 PetscFunctionBegin; 727 *m = mat->m; *n = mat->N; 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNC__ 732 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIDense" 733 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 734 { 735 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 736 737 PetscFunctionBegin; 738 if (m) *m = mat->rstart; 739 if (n) *n = mat->rend; 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNC__ 744 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIDense" 745 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 746 { 747 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 748 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 749 750 PetscFunctionBegin; 751 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 752 lrow = row - rstart; 753 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNC__ 758 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIDense" 759 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 760 { 761 int ierr; 762 763 PetscFunctionBegin; 764 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 765 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 766 PetscFunctionReturn(0); 767 } 768 769 #undef __FUNC__ 770 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIDense" 771 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 772 { 773 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 774 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 775 Scalar *l,*r,x,*v; 776 int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 777 778 PetscFunctionBegin; 779 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 780 if (ll) { 781 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 782 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 783 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 784 for (i=0; i<m; i++) { 785 x = l[i]; 786 v = mat->v + i; 787 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 788 } 789 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 790 PLogFlops(n*m); 791 } 792 if (rr) { 793 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 794 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 795 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 796 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 797 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 798 for (i=0; i<n; i++) { 799 x = r[i]; 800 v = mat->v + i*m; 801 for (j=0; j<m; j++) { (*v++) *= x;} 802 } 803 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 804 PLogFlops(n*m); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNC__ 810 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIDense" 811 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 812 { 813 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 814 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 815 int ierr,i,j; 816 PetscReal sum = 0.0; 817 Scalar *v = mat->v; 818 819 PetscFunctionBegin; 820 if (mdn->size == 1) { 821 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 822 } else { 823 if (type == NORM_FROBENIUS) { 824 for (i=0; i<mat->n*mat->m; i++) { 825 #if defined(PETSC_USE_COMPLEX) 826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827 #else 828 sum += (*v)*(*v); v++; 829 #endif 830 } 831 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 832 *norm = sqrt(*norm); 833 PLogFlops(2*mat->n*mat->m); 834 } else if (type == NORM_1) { 835 PetscReal *tmp,*tmp2; 836 tmp = (PetscReal*)PetscMalloc(2*mdn->N*sizeof(PetscReal));CHKPTRQ(tmp); 837 tmp2 = tmp + mdn->N; 838 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(PetscReal));CHKERRQ(ierr); 839 *norm = 0.0; 840 v = mat->v; 841 for (j=0; j<mat->n; j++) { 842 for (i=0; i<mat->m; i++) { 843 tmp[j] += PetscAbsScalar(*v); v++; 844 } 845 } 846 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 847 for (j=0; j<mdn->N; j++) { 848 if (tmp2[j] > *norm) *norm = tmp2[j]; 849 } 850 ierr = PetscFree(tmp);CHKERRQ(ierr); 851 PLogFlops(mat->n*mat->m); 852 } else if (type == NORM_INFINITY) { /* max row norm */ 853 PetscReal ntemp; 854 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 855 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 856 } else { 857 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 858 } 859 } 860 PetscFunctionReturn(0); 861 } 862 863 #undef __FUNC__ 864 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIDense" 865 int MatTranspose_MPIDense(Mat A,Mat *matout) 866 { 867 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 868 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 869 Mat B; 870 int M = a->M,N = a->N,m,n,*rwork,rstart = a->rstart; 871 int j,i,ierr; 872 Scalar *v; 873 874 PetscFunctionBegin; 875 if (!matout && M != N) { 876 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 877 } 878 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 879 880 m = Aloc->m; n = Aloc->n; v = Aloc->v; 881 rwork = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 882 for (j=0; j<n; j++) { 883 for (i=0; i<m; i++) rwork[i] = rstart + i; 884 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 885 v += m; 886 } 887 ierr = PetscFree(rwork);CHKERRQ(ierr); 888 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 890 if (matout) { 891 *matout = B; 892 } else { 893 PetscOps *Abops; 894 MatOps Aops; 895 896 /* This isn't really an in-place transpose, but free data struct from a */ 897 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 898 ierr = MatDestroy(a->A);CHKERRQ(ierr); 899 if (a->lvec) VecDestroy(a->lvec); 900 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 901 ierr = PetscFree(a);CHKERRQ(ierr); 902 903 /* 904 This is horrible, horrible code. We need to keep the 905 A pointers for the bops and ops but copy everything 906 else from C. 907 */ 908 Abops = A->bops; 909 Aops = A->ops; 910 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 911 A->bops = Abops; 912 A->ops = Aops; 913 914 PetscHeaderDestroy(B); 915 } 916 PetscFunctionReturn(0); 917 } 918 919 #include "pinclude/blaslapack.h" 920 #undef __FUNC__ 921 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIDense" 922 int MatScale_MPIDense(Scalar *alpha,Mat inA) 923 { 924 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 925 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 926 int one = 1,nz; 927 928 PetscFunctionBegin; 929 nz = a->m*a->n; 930 BLscal_(&nz,alpha,a->v,&one); 931 PLogFlops(nz); 932 PetscFunctionReturn(0); 933 } 934 935 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 936 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 937 938 /* -------------------------------------------------------------------*/ 939 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 940 MatGetRow_MPIDense, 941 MatRestoreRow_MPIDense, 942 MatMult_MPIDense, 943 MatMultAdd_MPIDense, 944 MatMultTranspose_MPIDense, 945 MatMultTransposeAdd_MPIDense, 946 0, 947 0, 948 0, 949 0, 950 0, 951 0, 952 0, 953 MatTranspose_MPIDense, 954 MatGetInfo_MPIDense,0, 955 MatGetDiagonal_MPIDense, 956 MatDiagonalScale_MPIDense, 957 MatNorm_MPIDense, 958 MatAssemblyBegin_MPIDense, 959 MatAssemblyEnd_MPIDense, 960 0, 961 MatSetOption_MPIDense, 962 MatZeroEntries_MPIDense, 963 MatZeroRows_MPIDense, 964 0, 965 0, 966 0, 967 0, 968 MatGetSize_MPIDense, 969 MatGetLocalSize_MPIDense, 970 MatGetOwnershipRange_MPIDense, 971 0, 972 0, 973 MatGetArray_MPIDense, 974 MatRestoreArray_MPIDense, 975 MatDuplicate_MPIDense, 976 0, 977 0, 978 0, 979 0, 980 0, 981 MatGetSubMatrices_MPIDense, 982 0, 983 MatGetValues_MPIDense, 984 0, 985 0, 986 MatScale_MPIDense, 987 0, 988 0, 989 0, 990 MatGetBlockSize_MPIDense, 991 0, 992 0, 993 0, 994 0, 995 0, 996 0, 997 0, 998 0, 999 0, 1000 MatGetSubMatrix_MPIDense, 1001 0, 1002 0, 1003 MatGetMaps_Petsc}; 1004 1005 #undef __FUNC__ 1006 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIDense" 1007 /*@C 1008 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1009 1010 Collective on MPI_Comm 1011 1012 Input Parameters: 1013 + comm - MPI communicator 1014 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1015 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1016 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1017 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1018 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1019 to control all matrix memory allocation. 1020 1021 Output Parameter: 1022 . A - the matrix 1023 1024 Notes: 1025 The dense format is fully compatible with standard Fortran 77 1026 storage by columns. 1027 1028 The data input variable is intended primarily for Fortran programmers 1029 who wish to allocate their own matrix memory space. Most users should 1030 set data=PETSC_NULL. 1031 1032 The user MUST specify either the local or global matrix dimensions 1033 (possibly both). 1034 1035 Level: intermediate 1036 1037 .keywords: matrix,dense, parallel 1038 1039 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1040 @*/ 1041 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1042 { 1043 Mat mat; 1044 Mat_MPIDense *a; 1045 int ierr,i; 1046 PetscTruth flg; 1047 1048 PetscFunctionBegin; 1049 /* Note: For now, when data is specified above, this assumes the user correctly 1050 allocates the local dense storage space. We should add error checking. */ 1051 1052 *A = 0; 1053 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1054 PLogObjectCreate(mat); 1055 mat->data = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1056 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1057 mat->ops->destroy = MatDestroy_MPIDense; 1058 mat->ops->view = MatView_MPIDense; 1059 mat->factor = 0; 1060 mat->mapping = 0; 1061 1062 a->factor = 0; 1063 mat->insertmode = NOT_SET_VALUES; 1064 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1065 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1066 1067 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1068 1069 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1070 a->nvec = n; 1071 1072 /* each row stores all columns */ 1073 a->N = mat->N = N; 1074 a->M = mat->M = M; 1075 a->m = mat->m = m; 1076 a->n = mat->n = N; /* NOTE: n == N */ 1077 1078 /* the information in the maps duplicates the information computed below, eventually 1079 we should remove the duplicate information that is not contained in the maps */ 1080 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1081 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1082 1083 /* build local table of row and column ownerships */ 1084 a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1085 a->cowners = a->rowners + a->size + 1; 1086 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1087 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1088 a->rowners[0] = 0; 1089 for (i=2; i<=a->size; i++) { 1090 a->rowners[i] += a->rowners[i-1]; 1091 } 1092 a->rstart = a->rowners[a->rank]; 1093 a->rend = a->rowners[a->rank+1]; 1094 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1095 a->cowners[0] = 0; 1096 for (i=2; i<=a->size; i++) { 1097 a->cowners[i] += a->cowners[i-1]; 1098 } 1099 1100 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1101 PLogObjectParent(mat,a->A); 1102 1103 /* build cache for off array entries formed */ 1104 a->donotstash = 0; 1105 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1106 1107 /* stuff used for matrix vector multiply */ 1108 a->lvec = 0; 1109 a->Mvctx = 0; 1110 a->roworiented = 1; 1111 1112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1113 "MatGetDiagonalBlock_MPIDense", 1114 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1115 1116 *A = mat; 1117 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1118 if (flg) { 1119 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1120 } 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNC__ 1125 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIDense" 1126 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1127 { 1128 Mat mat; 1129 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1130 int ierr; 1131 FactorCtx *factor; 1132 1133 PetscFunctionBegin; 1134 *newmat = 0; 1135 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1136 PLogObjectCreate(mat); 1137 mat->data = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1138 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1139 mat->ops->destroy = MatDestroy_MPIDense; 1140 mat->ops->view = MatView_MPIDense; 1141 mat->factor = A->factor; 1142 mat->assembled = PETSC_TRUE; 1143 1144 a->m = mat->m = oldmat->m; 1145 a->n = mat->n = oldmat->n; 1146 a->M = mat->M = oldmat->M; 1147 a->N = mat->N = oldmat->N; 1148 if (oldmat->factor) { 1149 a->factor = (FactorCtx*)(factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1150 /* copy factor contents ... add this code! */ 1151 } else a->factor = 0; 1152 1153 a->rstart = oldmat->rstart; 1154 a->rend = oldmat->rend; 1155 a->size = oldmat->size; 1156 a->rank = oldmat->rank; 1157 mat->insertmode = NOT_SET_VALUES; 1158 a->donotstash = oldmat->donotstash; 1159 a->rowners = (int*)PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1160 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1161 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1162 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1163 1164 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1165 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1166 PLogObjectParent(mat,a->A); 1167 *newmat = mat; 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #include "petscsys.h" 1172 1173 #undef __FUNC__ 1174 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense_DenseInFile" 1175 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1176 { 1177 int *rowners,i,size,rank,m,ierr,nz,j; 1178 Scalar *array,*vals,*vals_ptr; 1179 MPI_Status status; 1180 1181 PetscFunctionBegin; 1182 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1184 1185 /* determine ownership of all rows */ 1186 m = M/size + ((M % size) > rank); 1187 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1188 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1189 rowners[0] = 0; 1190 for (i=2; i<=size; i++) { 1191 rowners[i] += rowners[i-1]; 1192 } 1193 1194 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1195 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1196 1197 if (!rank) { 1198 vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals); 1199 1200 /* read in my part of the matrix numerical values */ 1201 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1202 1203 /* insert into matrix-by row (this is why cannot directly read into array */ 1204 vals_ptr = vals; 1205 for (i=0; i<m; i++) { 1206 for (j=0; j<N; j++) { 1207 array[i + j*m] = *vals_ptr++; 1208 } 1209 } 1210 1211 /* read in other processors and ship out */ 1212 for (i=1; i<size; i++) { 1213 nz = (rowners[i+1] - rowners[i])*N; 1214 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1215 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1216 } 1217 } else { 1218 /* receive numeric values */ 1219 vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals); 1220 1221 /* receive message of values*/ 1222 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1223 1224 /* insert into matrix-by row (this is why cannot directly read into array */ 1225 vals_ptr = vals; 1226 for (i=0; i<m; i++) { 1227 for (j=0; j<N; j++) { 1228 array[i + j*m] = *vals_ptr++; 1229 } 1230 } 1231 } 1232 ierr = PetscFree(rowners);CHKERRQ(ierr); 1233 ierr = PetscFree(vals);CHKERRQ(ierr); 1234 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1235 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 1240 #undef __FUNC__ 1241 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense" 1242 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1243 { 1244 Mat A; 1245 Scalar *vals,*svals; 1246 MPI_Comm comm = ((PetscObject)viewer)->comm; 1247 MPI_Status status; 1248 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1249 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1250 int tag = ((PetscObject)viewer)->tag; 1251 int i,nz,ierr,j,rstart,rend,fd; 1252 1253 PetscFunctionBegin; 1254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1255 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1256 if (!rank) { 1257 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1258 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1259 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1260 } 1261 1262 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1263 M = header[1]; N = header[2]; nz = header[3]; 1264 1265 /* 1266 Handle case where matrix is stored on disk as a dense matrix 1267 */ 1268 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1269 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 /* determine ownership of all rows */ 1274 m = M/size + ((M % size) > rank); 1275 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1276 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1277 rowners[0] = 0; 1278 for (i=2; i<=size; i++) { 1279 rowners[i] += rowners[i-1]; 1280 } 1281 rstart = rowners[rank]; 1282 rend = rowners[rank+1]; 1283 1284 /* distribute row lengths to all processors */ 1285 ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens); 1286 offlens = ourlens + (rend-rstart); 1287 if (!rank) { 1288 rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths); 1289 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1290 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 1291 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1292 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1293 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1294 } else { 1295 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1296 } 1297 1298 if (!rank) { 1299 /* calculate the number of nonzeros on each processor */ 1300 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 1301 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1302 for (i=0; i<size; i++) { 1303 for (j=rowners[i]; j< rowners[i+1]; j++) { 1304 procsnz[i] += rowlengths[j]; 1305 } 1306 } 1307 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1308 1309 /* determine max buffer needed and allocate it */ 1310 maxnz = 0; 1311 for (i=0; i<size; i++) { 1312 maxnz = PetscMax(maxnz,procsnz[i]); 1313 } 1314 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 1315 1316 /* read in my part of the matrix column indices */ 1317 nz = procsnz[0]; 1318 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1319 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1320 1321 /* read in every one elses and ship off */ 1322 for (i=1; i<size; i++) { 1323 nz = procsnz[i]; 1324 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1325 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree(cols);CHKERRQ(ierr); 1328 } else { 1329 /* determine buffer space needed for message */ 1330 nz = 0; 1331 for (i=0; i<m; i++) { 1332 nz += ourlens[i]; 1333 } 1334 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1335 1336 /* receive message of column indices*/ 1337 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1338 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1339 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1340 } 1341 1342 /* loop over local rows, determining number of off diagonal entries */ 1343 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1344 jj = 0; 1345 for (i=0; i<m; i++) { 1346 for (j=0; j<ourlens[i]; j++) { 1347 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1348 jj++; 1349 } 1350 } 1351 1352 /* create our matrix */ 1353 for (i=0; i<m; i++) { 1354 ourlens[i] -= offlens[i]; 1355 } 1356 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1357 A = *newmat; 1358 for (i=0; i<m; i++) { 1359 ourlens[i] += offlens[i]; 1360 } 1361 1362 if (!rank) { 1363 vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals); 1364 1365 /* read in my part of the matrix numerical values */ 1366 nz = procsnz[0]; 1367 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1368 1369 /* insert into matrix */ 1370 jj = rstart; 1371 smycols = mycols; 1372 svals = vals; 1373 for (i=0; i<m; i++) { 1374 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1375 smycols += ourlens[i]; 1376 svals += ourlens[i]; 1377 jj++; 1378 } 1379 1380 /* read in other processors and ship out */ 1381 for (i=1; i<size; i++) { 1382 nz = procsnz[i]; 1383 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1384 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1385 } 1386 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1387 } else { 1388 /* receive numeric values */ 1389 vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals); 1390 1391 /* receive message of values*/ 1392 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1393 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1394 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1395 1396 /* insert into matrix */ 1397 jj = rstart; 1398 smycols = mycols; 1399 svals = vals; 1400 for (i=0; i<m; i++) { 1401 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1402 smycols += ourlens[i]; 1403 svals += ourlens[i]; 1404 jj++; 1405 } 1406 } 1407 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1408 ierr = PetscFree(vals);CHKERRQ(ierr); 1409 ierr = PetscFree(mycols);CHKERRQ(ierr); 1410 ierr = PetscFree(rowners);CHKERRQ(ierr); 1411 1412 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 1418 1419 1420 1421