1 /*$Id: mpidense.c,v 1.145 2000/10/24 20:25:30 bsmith Exp bsmith $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNC__ 12 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,rank,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 27 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 28 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 29 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 30 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33 34 *iscopy = PETSC_TRUE; 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN int MatSetUpMultiply_MPIDense(Mat); 40 41 #undef __FUNC__ 42 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIDense" 43 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 44 { 45 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 46 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 47 PetscTruth roworiented = A->roworiented; 48 49 PetscFunctionBegin; 50 for (i=0; i<m; i++) { 51 if (idxm[i] < 0) continue; 52 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 if (roworiented) { 56 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 57 } else { 58 for (j=0; j<n; j++) { 59 if (idxn[j] < 0) continue; 60 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 61 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 62 } 63 } 64 } else { 65 if (!A->donotstash) { 66 if (roworiented) { 67 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 68 } else { 69 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 70 } 71 } 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIDense" 79 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 80 { 81 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 82 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 83 84 PetscFunctionBegin; 85 for (i=0; i<m; i++) { 86 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 87 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 88 if (idxm[i] >= rstart && idxm[i] < rend) { 89 row = idxm[i] - rstart; 90 for (j=0; j<n; j++) { 91 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 92 if (idxn[j] >= mat->N) { 93 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 94 } 95 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 96 } 97 } else { 98 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 99 } 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNC__ 105 #define __FUNC__ /*<a name=""></a>*/"MatGetArray_MPIDense" 106 int MatGetArray_MPIDense(Mat A,Scalar **array) 107 { 108 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 109 int ierr; 110 111 PetscFunctionBegin; 112 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNC__ 117 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIDense" 118 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 119 { 120 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 121 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 122 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols,rank; 123 Scalar *av,*bv,*v = lmat->v; 124 Mat newmat; 125 126 PetscFunctionBegin; 127 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 128 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 129 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 130 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 131 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 132 133 /* No parallel redistribution currently supported! Should really check each index set 134 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 135 original matrix! */ 136 137 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 138 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 139 140 /* Check submatrix call */ 141 if (scall == MAT_REUSE_MATRIX) { 142 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 143 /* Really need to test rows and column sizes! */ 144 newmat = *B; 145 } else { 146 /* Create and fill new matrix */ 147 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 148 } 149 150 /* Now extract the data pointers and do the copy, column at a time */ 151 newmatd = (Mat_MPIDense*)newmat->data; 152 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 153 154 for (i=0; i<ncols; i++) { 155 av = v + nlrows*icol[i]; 156 for (j=0; j<nrows; j++) { 157 *bv++ = av[irow[j] - rstart]; 158 } 159 } 160 161 /* Assemble the matrices so that the correct flags are set */ 162 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 165 /* Free work space */ 166 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 167 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 168 *B = newmat; 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNC__ 173 #define __FUNC__ /*<a name=""></a>*/"MatRestoreArray_MPIDense" 174 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 175 { 176 PetscFunctionBegin; 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIDense" 182 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 183 { 184 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 185 MPI_Comm comm = mat->comm; 186 int ierr,nstash,reallocs; 187 InsertMode addv; 188 189 PetscFunctionBegin; 190 /* make sure all processors are either in INSERTMODE or ADDMODE */ 191 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 192 if (addv == (ADD_VALUES|INSERT_VALUES)) { 193 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 194 } 195 mat->insertmode = addv; /* in case this processor had no cache */ 196 197 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 198 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 199 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIDense" 205 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 206 { 207 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 208 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 209 Scalar *val; 210 InsertMode addv=mat->insertmode; 211 212 PetscFunctionBegin; 213 /* wait on receives */ 214 while (1) { 215 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 216 if (!flg) break; 217 218 for (i=0; i<n;) { 219 /* Now identify the consecutive vals belonging to the same row */ 220 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 221 if (j < n) ncols = j-i; 222 else ncols = n-i; 223 /* Now assemble all these values with a single function call */ 224 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 225 i = j; 226 } 227 } 228 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 229 230 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 235 } 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNC__ 240 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIDense" 241 int MatZeroEntries_MPIDense(Mat A) 242 { 243 int ierr; 244 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 245 246 PetscFunctionBegin; 247 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNC__ 252 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIDense" 253 int MatGetBlockSize_MPIDense(Mat A,int *bs) 254 { 255 PetscFunctionBegin; 256 *bs = 1; 257 PetscFunctionReturn(0); 258 } 259 260 /* the code does not do the diagonal entries correctly unless the 261 matrix is square and the column and row owerships are identical. 262 This is a BUG. The only way to fix it seems to be to access 263 mdn->A and mdn->B directly and not through the MatZeroRows() 264 routine. 265 */ 266 #undef __FUNC__ 267 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIDense" 268 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 269 { 270 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 271 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 272 int *procs,*nprocs,j,idx,nsends,*work; 273 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 274 int *rvalues,tag = A->tag,count,base,slen,n,*source; 275 int *lens,imdex,*lrows,*values; 276 MPI_Comm comm = A->comm; 277 MPI_Request *send_waits,*recv_waits; 278 MPI_Status recv_status,*send_status; 279 IS istmp; 280 PetscTruth found; 281 282 PetscFunctionBegin; 283 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 284 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 285 286 /* first count number of contributors to each processor */ 287 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 288 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 289 procs = nprocs + size; 290 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 291 for (i=0; i<N; i++) { 292 idx = rows[i]; 293 found = PETSC_FALSE; 294 for (j=0; j<size; j++) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 297 } 298 } 299 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 300 } 301 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 305 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 306 nmax = work[rank]; 307 nrecvs = work[size+rank]; 308 ierr = PetscFree(work);CHKERRQ(ierr); 309 310 /* post receives: */ 311 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 312 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 313 for (i=0; i<nrecvs; i++) { 314 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 315 } 316 317 /* do sends: 318 1) starts[i] gives the starting index in svalues for stuff going to 319 the ith processor 320 */ 321 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 322 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 323 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 324 starts[0] = 0; 325 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 326 for (i=0; i<N; i++) { 327 svalues[starts[owner[i]]++] = rows[i]; 328 } 329 ISRestoreIndices(is,&rows); 330 331 starts[0] = 0; 332 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 333 count = 0; 334 for (i=0; i<size; i++) { 335 if (procs[i]) { 336 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 337 } 338 } 339 ierr = PetscFree(starts);CHKERRQ(ierr); 340 341 base = owners[rank]; 342 343 /* wait on receives */ 344 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 345 source = lens + nrecvs; 346 count = nrecvs; slen = 0; 347 while (count) { 348 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 349 /* unpack receives into our local space */ 350 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 351 source[imdex] = recv_status.MPI_SOURCE; 352 lens[imdex] = n; 353 slen += n; 354 count--; 355 } 356 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 357 358 /* move the data into the send scatter */ 359 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 360 count = 0; 361 for (i=0; i<nrecvs; i++) { 362 values = rvalues + i*nmax; 363 for (j=0; j<lens[i]; j++) { 364 lrows[count++] = values[j] - base; 365 } 366 } 367 ierr = PetscFree(rvalues);CHKERRQ(ierr); 368 ierr = PetscFree(lens);CHKERRQ(ierr); 369 ierr = PetscFree(owner);CHKERRQ(ierr); 370 ierr = PetscFree(nprocs);CHKERRQ(ierr); 371 372 /* actually zap the local rows */ 373 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 374 PLogObjectParent(A,istmp); 375 ierr = PetscFree(lrows);CHKERRQ(ierr); 376 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 377 ierr = ISDestroy(istmp);CHKERRQ(ierr); 378 379 /* wait on sends */ 380 if (nsends) { 381 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 382 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 383 ierr = PetscFree(send_status);CHKERRQ(ierr); 384 } 385 ierr = PetscFree(send_waits);CHKERRQ(ierr); 386 ierr = PetscFree(svalues);CHKERRQ(ierr); 387 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNC__ 392 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIDense" 393 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 396 int ierr; 397 398 PetscFunctionBegin; 399 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 400 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNC__ 406 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIDense" 407 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 410 int ierr; 411 412 PetscFunctionBegin; 413 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNC__ 420 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIDense" 421 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 422 { 423 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 424 int ierr; 425 Scalar zero = 0.0; 426 427 PetscFunctionBegin; 428 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 429 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 430 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 431 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNC__ 436 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIDense" 437 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 438 { 439 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 440 int ierr; 441 442 PetscFunctionBegin; 443 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 444 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 445 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 446 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNC__ 451 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIDense" 452 int MatGetDiagonal_MPIDense(Mat A,Vec v) 453 { 454 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 455 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 456 int ierr,len,i,n,m = A->m,radd; 457 Scalar *x,zero = 0.0; 458 459 PetscFunctionBegin; 460 ierr = VecSet(&zero,v);CHKERRQ(ierr); 461 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 462 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 463 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 464 len = PetscMin(a->A->m,a->A->n); 465 radd = a->rstart*m; 466 for (i=0; i<len; i++) { 467 x[i] = aloc->v[radd + i*m + i]; 468 } 469 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIDense" 475 int MatDestroy_MPIDense(Mat mat) 476 { 477 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 478 int ierr; 479 480 PetscFunctionBegin; 481 482 #if defined(PETSC_USE_LOG) 483 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 484 #endif 485 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 486 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 487 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 488 if (mdn->lvec) VecDestroy(mdn->lvec); 489 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 490 if (mdn->factor) { 491 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 492 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 493 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 494 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 495 } 496 ierr = PetscFree(mdn);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNC__ 501 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_Binary" 502 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 503 { 504 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 505 int ierr; 506 507 PetscFunctionBegin; 508 if (mdn->size == 1) { 509 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 510 } 511 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNC__ 516 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense_ASCIIorDraworSocket" 517 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,Viewer viewer) 518 { 519 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 520 int ierr,format,size = mdn->size,rank = mdn->rank; 521 ViewerType vtype; 522 PetscTruth isascii,isdraw; 523 Viewer sviewer; 524 525 PetscFunctionBegin; 526 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 527 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 528 if (isascii) { 529 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 530 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 531 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 532 MatInfo info; 533 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 534 ierr = ViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 535 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 536 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 537 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 540 PetscFunctionReturn(0); 541 } 542 } else if (isdraw) { 543 Draw draw; 544 PetscTruth isnull; 545 546 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 547 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); 548 if (isnull) PetscFunctionReturn(0); 549 } 550 551 if (size == 1) { 552 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 553 } else { 554 /* assemble the entire matrix onto first processor. */ 555 Mat A; 556 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 557 Scalar *vals; 558 559 if (!rank) { 560 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 561 } else { 562 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 563 } 564 PLogObjectParent(mat,A); 565 566 /* Copy the matrix ... This isn't the most efficient means, 567 but it's quick for now */ 568 row = mdn->rstart; m = mdn->A->m; 569 for (i=0; i<m; i++) { 570 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 571 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 572 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 573 row++; 574 } 575 576 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 577 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 578 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 579 if (!rank) { 580 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 581 } 582 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 583 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 584 ierr = MatDestroy(A);CHKERRQ(ierr); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIDense" 591 int MatView_MPIDense(Mat mat,Viewer viewer) 592 { 593 int ierr; 594 PetscTruth isascii,isbinary,isdraw,issocket; 595 596 PetscFunctionBegin; 597 598 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 601 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 602 603 if (isascii || issocket || isdraw) { 604 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 605 } else if (isbinary) { 606 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 607 } else { 608 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 609 } 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIDense" 615 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 616 { 617 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 618 Mat mdn = mat->A; 619 int ierr; 620 PetscReal isend[5],irecv[5]; 621 622 PetscFunctionBegin; 623 info->rows_global = (double)A->M; 624 info->columns_global = (double)A->N; 625 info->rows_local = (double)A->m; 626 info->columns_local = (double)A->N; 627 info->block_size = 1.0; 628 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 629 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 630 isend[3] = info->memory; isend[4] = info->mallocs; 631 if (flag == MAT_LOCAL) { 632 info->nz_used = isend[0]; 633 info->nz_allocated = isend[1]; 634 info->nz_unneeded = isend[2]; 635 info->memory = isend[3]; 636 info->mallocs = isend[4]; 637 } else if (flag == MAT_GLOBAL_MAX) { 638 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 639 info->nz_used = irecv[0]; 640 info->nz_allocated = irecv[1]; 641 info->nz_unneeded = irecv[2]; 642 info->memory = irecv[3]; 643 info->mallocs = irecv[4]; 644 } else if (flag == MAT_GLOBAL_SUM) { 645 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 646 info->nz_used = irecv[0]; 647 info->nz_allocated = irecv[1]; 648 info->nz_unneeded = irecv[2]; 649 info->memory = irecv[3]; 650 info->mallocs = irecv[4]; 651 } 652 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 653 info->fill_ratio_needed = 0; 654 info->factor_mallocs = 0; 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIDense" 660 int MatSetOption_MPIDense(Mat A,MatOption op) 661 { 662 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 663 int ierr; 664 665 PetscFunctionBegin; 666 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 667 op == MAT_YES_NEW_NONZERO_LOCATIONS || 668 op == MAT_NEW_NONZERO_LOCATION_ERR || 669 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 670 op == MAT_COLUMNS_SORTED || 671 op == MAT_COLUMNS_UNSORTED) { 672 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 673 } else if (op == MAT_ROW_ORIENTED) { 674 a->roworiented = PETSC_TRUE; 675 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 676 } else if (op == MAT_ROWS_SORTED || 677 op == MAT_ROWS_UNSORTED || 678 op == MAT_SYMMETRIC || 679 op == MAT_STRUCTURALLY_SYMMETRIC || 680 op == MAT_YES_NEW_DIAGONALS || 681 op == MAT_USE_HASH_TABLE) { 682 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 683 } else if (op == MAT_COLUMN_ORIENTED) { 684 a->roworiented = PETSC_FALSE; 685 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 686 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 687 a->donotstash = PETSC_TRUE; 688 } else if (op == MAT_NO_NEW_DIAGONALS) { 689 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 690 } else { 691 SETERRQ(PETSC_ERR_SUP,"unknown option"); 692 } 693 PetscFunctionReturn(0); 694 } 695 696 #undef __FUNC__ 697 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIDense" 698 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 699 { 700 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 701 702 PetscFunctionBegin; 703 if (m) *m = mat->rstart; 704 if (n) *n = mat->rend; 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNC__ 709 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIDense" 710 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 711 { 712 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 713 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 714 715 PetscFunctionBegin; 716 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 717 lrow = row - rstart; 718 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNC__ 723 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIDense" 724 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 725 { 726 int ierr; 727 728 PetscFunctionBegin; 729 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 730 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNC__ 735 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIDense" 736 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 737 { 738 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 739 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 740 Scalar *l,*r,x,*v; 741 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 742 743 PetscFunctionBegin; 744 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 745 if (ll) { 746 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 747 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 748 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 749 for (i=0; i<m; i++) { 750 x = l[i]; 751 v = mat->v + i; 752 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 753 } 754 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 755 PLogFlops(n*m); 756 } 757 if (rr) { 758 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 759 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 760 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 761 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 762 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 763 for (i=0; i<n; i++) { 764 x = r[i]; 765 v = mat->v + i*m; 766 for (j=0; j<m; j++) { (*v++) *= x;} 767 } 768 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 769 PLogFlops(n*m); 770 } 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNC__ 775 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIDense" 776 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 777 { 778 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 779 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 780 int ierr,i,j; 781 PetscReal sum = 0.0; 782 Scalar *v = mat->v; 783 784 PetscFunctionBegin; 785 if (mdn->size == 1) { 786 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 787 } else { 788 if (type == NORM_FROBENIUS) { 789 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 790 #if defined(PETSC_USE_COMPLEX) 791 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 792 #else 793 sum += (*v)*(*v); v++; 794 #endif 795 } 796 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 797 *norm = sqrt(*norm); 798 PLogFlops(2*mdn->A->n*mdn->A->m); 799 } else if (type == NORM_1) { 800 PetscReal *tmp,*tmp2; 801 tmp = (PetscReal*)PetscMalloc(2*A->N*sizeof(PetscReal));CHKPTRQ(tmp); 802 tmp2 = tmp + A->N; 803 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 804 *norm = 0.0; 805 v = mat->v; 806 for (j=0; j<mdn->A->n; j++) { 807 for (i=0; i<mdn->A->m; i++) { 808 tmp[j] += PetscAbsScalar(*v); v++; 809 } 810 } 811 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 812 for (j=0; j<A->N; j++) { 813 if (tmp2[j] > *norm) *norm = tmp2[j]; 814 } 815 ierr = PetscFree(tmp);CHKERRQ(ierr); 816 PLogFlops(A->n*A->m); 817 } else if (type == NORM_INFINITY) { /* max row norm */ 818 PetscReal ntemp; 819 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 820 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 821 } else { 822 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 823 } 824 } 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNC__ 829 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIDense" 830 int MatTranspose_MPIDense(Mat A,Mat *matout) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 834 Mat B; 835 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 836 int j,i,ierr; 837 Scalar *v; 838 839 PetscFunctionBegin; 840 if (!matout && M != N) { 841 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 842 } 843 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 844 845 m = a->A->m; n = a->A->n; v = Aloc->v; 846 rwork = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 847 for (j=0; j<n; j++) { 848 for (i=0; i<m; i++) rwork[i] = rstart + i; 849 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 850 v += m; 851 } 852 ierr = PetscFree(rwork);CHKERRQ(ierr); 853 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 855 if (matout) { 856 *matout = B; 857 } else { 858 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 859 } 860 PetscFunctionReturn(0); 861 } 862 863 #include "petscblaslapack.h" 864 #undef __FUNC__ 865 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIDense" 866 int MatScale_MPIDense(Scalar *alpha,Mat inA) 867 { 868 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 869 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 870 int one = 1,nz; 871 872 PetscFunctionBegin; 873 nz = inA->m*inA->N; 874 BLscal_(&nz,alpha,a->v,&one); 875 PLogFlops(nz); 876 PetscFunctionReturn(0); 877 } 878 879 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 880 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 881 882 #undef __FUNC__ 883 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIDense"></a>*/"MatSetUpPreallocation_MPIDense" 884 int MatSetUpPreallocation_MPIDense(Mat A) 885 { 886 int ierr; 887 888 PetscFunctionBegin; 889 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 /* -------------------------------------------------------------------*/ 894 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 895 MatGetRow_MPIDense, 896 MatRestoreRow_MPIDense, 897 MatMult_MPIDense, 898 MatMultAdd_MPIDense, 899 MatMultTranspose_MPIDense, 900 MatMultTransposeAdd_MPIDense, 901 0, 902 0, 903 0, 904 0, 905 0, 906 0, 907 0, 908 MatTranspose_MPIDense, 909 MatGetInfo_MPIDense,0, 910 MatGetDiagonal_MPIDense, 911 MatDiagonalScale_MPIDense, 912 MatNorm_MPIDense, 913 MatAssemblyBegin_MPIDense, 914 MatAssemblyEnd_MPIDense, 915 0, 916 MatSetOption_MPIDense, 917 MatZeroEntries_MPIDense, 918 MatZeroRows_MPIDense, 919 0, 920 0, 921 0, 922 0, 923 MatSetUpPreallocation_MPIDense, 924 0, 925 MatGetOwnershipRange_MPIDense, 926 0, 927 0, 928 MatGetArray_MPIDense, 929 MatRestoreArray_MPIDense, 930 MatDuplicate_MPIDense, 931 0, 932 0, 933 0, 934 0, 935 0, 936 MatGetSubMatrices_MPIDense, 937 0, 938 MatGetValues_MPIDense, 939 0, 940 0, 941 MatScale_MPIDense, 942 0, 943 0, 944 0, 945 MatGetBlockSize_MPIDense, 946 0, 947 0, 948 0, 949 0, 950 0, 951 0, 952 0, 953 0, 954 0, 955 MatGetSubMatrix_MPIDense, 956 MatDestroy_MPIDense, 957 MatView_MPIDense, 958 MatGetMaps_Petsc}; 959 960 EXTERN_C_BEGIN 961 #undef __FUNC__ 962 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIDense" 963 int MatCreate_MPIDense(Mat mat) 964 { 965 Mat_MPIDense *a; 966 int ierr,i; 967 968 PetscFunctionBegin; 969 mat->data = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 970 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 971 mat->factor = 0; 972 mat->mapping = 0; 973 974 a->factor = 0; 975 mat->insertmode = NOT_SET_VALUES; 976 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 977 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 978 979 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 980 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 981 a->nvec = mat->n; 982 983 /* the information in the maps duplicates the information computed below, eventually 984 we should remove the duplicate information that is not contained in the maps */ 985 ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 986 ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 987 988 /* build local table of row and column ownerships */ 989 a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 990 a->cowners = a->rowners + a->size + 1; 991 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 992 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 993 a->rowners[0] = 0; 994 for (i=2; i<=a->size; i++) { 995 a->rowners[i] += a->rowners[i-1]; 996 } 997 a->rstart = a->rowners[a->rank]; 998 a->rend = a->rowners[a->rank+1]; 999 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1000 a->cowners[0] = 0; 1001 for (i=2; i<=a->size; i++) { 1002 a->cowners[i] += a->cowners[i-1]; 1003 } 1004 1005 /* build cache for off array entries formed */ 1006 a->donotstash = PETSC_FALSE; 1007 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1008 1009 /* stuff used for matrix vector multiply */ 1010 a->lvec = 0; 1011 a->Mvctx = 0; 1012 a->roworiented = PETSC_TRUE; 1013 1014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1015 "MatGetDiagonalBlock_MPIDense", 1016 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 EXTERN_C_END 1020 1021 #undef __FUNC__ 1022 #define __FUNC__ /*<a name=""></a>*/"MatMPIDenseSetPreallocation" 1023 /*@C 1024 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1025 1026 Not collective 1027 1028 Input Parameters: 1029 . A - the matrix 1030 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1031 to control all matrix memory allocation. 1032 1033 Notes: 1034 The dense format is fully compatible with standard Fortran 77 1035 storage by columns. 1036 1037 The data input variable is intended primarily for Fortran programmers 1038 who wish to allocate their own matrix memory space. Most users should 1039 set data=PETSC_NULL. 1040 1041 Level: intermediate 1042 1043 .keywords: matrix,dense, parallel 1044 1045 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1046 @*/ 1047 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data) 1048 { 1049 Mat_MPIDense *a; 1050 int ierr; 1051 PetscTruth flg2; 1052 1053 PetscFunctionBegin; 1054 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1055 if (!flg2) PetscFunctionReturn(0); 1056 mat->preallocated = PETSC_TRUE; 1057 /* Note: For now, when data is specified above, this assumes the user correctly 1058 allocates the local dense storage space. We should add error checking. */ 1059 1060 a = (Mat_MPIDense*)mat->data; 1061 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1062 PLogObjectParent(mat,a->A); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNC__ 1067 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIDense" 1068 /*@C 1069 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1070 1071 Collective on MPI_Comm 1072 1073 Input Parameters: 1074 + comm - MPI communicator 1075 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1076 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1077 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1078 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1079 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1080 to control all matrix memory allocation. 1081 1082 Output Parameter: 1083 . A - the matrix 1084 1085 Notes: 1086 The dense format is fully compatible with standard Fortran 77 1087 storage by columns. 1088 1089 The data input variable is intended primarily for Fortran programmers 1090 who wish to allocate their own matrix memory space. Most users should 1091 set data=PETSC_NULL. 1092 1093 The user MUST specify either the local or global matrix dimensions 1094 (possibly both). 1095 1096 Level: intermediate 1097 1098 .keywords: matrix,dense, parallel 1099 1100 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1101 @*/ 1102 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1103 { 1104 int ierr,size; 1105 1106 PetscFunctionBegin; 1107 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1109 if (size > 1) { 1110 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1111 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1112 } else { 1113 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1114 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1115 } 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIDense" 1121 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1122 { 1123 Mat mat; 1124 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1125 int ierr; 1126 FactorCtx *factor; 1127 1128 PetscFunctionBegin; 1129 *newmat = 0; 1130 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1131 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1132 mat->data = (void*)(a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1133 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1134 mat->factor = A->factor; 1135 mat->assembled = PETSC_TRUE; 1136 mat->preallocated = PETSC_TRUE; 1137 1138 if (oldmat->factor) { 1139 a->factor = (FactorCtx*)(factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1140 /* copy factor contents ... add this code! */ 1141 } else a->factor = 0; 1142 1143 a->rstart = oldmat->rstart; 1144 a->rend = oldmat->rend; 1145 a->size = oldmat->size; 1146 a->rank = oldmat->rank; 1147 mat->insertmode = NOT_SET_VALUES; 1148 a->nvec = oldmat->nvec; 1149 a->donotstash = oldmat->donotstash; 1150 a->rowners = (int*)PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1151 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1152 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1153 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1154 1155 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1156 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1157 PLogObjectParent(mat,a->A); 1158 *newmat = mat; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #include "petscsys.h" 1163 1164 #undef __FUNC__ 1165 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense_DenseInFile" 1166 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1167 { 1168 int *rowners,i,size,rank,m,ierr,nz,j; 1169 Scalar *array,*vals,*vals_ptr; 1170 MPI_Status status; 1171 1172 PetscFunctionBegin; 1173 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 1176 /* determine ownership of all rows */ 1177 m = M/size + ((M % size) > rank); 1178 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1179 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1180 rowners[0] = 0; 1181 for (i=2; i<=size; i++) { 1182 rowners[i] += rowners[i-1]; 1183 } 1184 1185 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1186 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1187 1188 if (!rank) { 1189 vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals); 1190 1191 /* read in my part of the matrix numerical values */ 1192 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1193 1194 /* insert into matrix-by row (this is why cannot directly read into array */ 1195 vals_ptr = vals; 1196 for (i=0; i<m; i++) { 1197 for (j=0; j<N; j++) { 1198 array[i + j*m] = *vals_ptr++; 1199 } 1200 } 1201 1202 /* read in other processors and ship out */ 1203 for (i=1; i<size; i++) { 1204 nz = (rowners[i+1] - rowners[i])*N; 1205 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1206 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1207 } 1208 } else { 1209 /* receive numeric values */ 1210 vals = (Scalar*)PetscMalloc(m*N*sizeof(Scalar));CHKPTRQ(vals); 1211 1212 /* receive message of values*/ 1213 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1214 1215 /* insert into matrix-by row (this is why cannot directly read into array */ 1216 vals_ptr = vals; 1217 for (i=0; i<m; i++) { 1218 for (j=0; j<N; j++) { 1219 array[i + j*m] = *vals_ptr++; 1220 } 1221 } 1222 } 1223 ierr = PetscFree(rowners);CHKERRQ(ierr); 1224 ierr = PetscFree(vals);CHKERRQ(ierr); 1225 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1226 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 EXTERN_C_BEGIN 1231 #undef __FUNC__ 1232 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIDense" 1233 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1234 { 1235 Mat A; 1236 Scalar *vals,*svals; 1237 MPI_Comm comm = ((PetscObject)viewer)->comm; 1238 MPI_Status status; 1239 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1240 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1241 int tag = ((PetscObject)viewer)->tag; 1242 int i,nz,ierr,j,rstart,rend,fd; 1243 1244 PetscFunctionBegin; 1245 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1246 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1247 if (!rank) { 1248 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1249 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1250 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1251 } 1252 1253 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1254 M = header[1]; N = header[2]; nz = header[3]; 1255 1256 /* 1257 Handle case where matrix is stored on disk as a dense matrix 1258 */ 1259 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1260 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 /* determine ownership of all rows */ 1265 m = M/size + ((M % size) > rank); 1266 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1267 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1268 rowners[0] = 0; 1269 for (i=2; i<=size; i++) { 1270 rowners[i] += rowners[i-1]; 1271 } 1272 rstart = rowners[rank]; 1273 rend = rowners[rank+1]; 1274 1275 /* distribute row lengths to all processors */ 1276 ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens); 1277 offlens = ourlens + (rend-rstart); 1278 if (!rank) { 1279 rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths); 1280 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1281 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 1282 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1283 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1284 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1285 } else { 1286 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1287 } 1288 1289 if (!rank) { 1290 /* calculate the number of nonzeros on each processor */ 1291 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 1292 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1293 for (i=0; i<size; i++) { 1294 for (j=rowners[i]; j< rowners[i+1]; j++) { 1295 procsnz[i] += rowlengths[j]; 1296 } 1297 } 1298 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1299 1300 /* determine max buffer needed and allocate it */ 1301 maxnz = 0; 1302 for (i=0; i<size; i++) { 1303 maxnz = PetscMax(maxnz,procsnz[i]); 1304 } 1305 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 1306 1307 /* read in my part of the matrix column indices */ 1308 nz = procsnz[0]; 1309 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1310 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1311 1312 /* read in every one elses and ship off */ 1313 for (i=1; i<size; i++) { 1314 nz = procsnz[i]; 1315 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1316 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1317 } 1318 ierr = PetscFree(cols);CHKERRQ(ierr); 1319 } else { 1320 /* determine buffer space needed for message */ 1321 nz = 0; 1322 for (i=0; i<m; i++) { 1323 nz += ourlens[i]; 1324 } 1325 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1326 1327 /* receive message of column indices*/ 1328 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1329 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1330 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1331 } 1332 1333 /* loop over local rows, determining number of off diagonal entries */ 1334 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1335 jj = 0; 1336 for (i=0; i<m; i++) { 1337 for (j=0; j<ourlens[i]; j++) { 1338 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1339 jj++; 1340 } 1341 } 1342 1343 /* create our matrix */ 1344 for (i=0; i<m; i++) { 1345 ourlens[i] -= offlens[i]; 1346 } 1347 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1348 A = *newmat; 1349 for (i=0; i<m; i++) { 1350 ourlens[i] += offlens[i]; 1351 } 1352 1353 if (!rank) { 1354 vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals); 1355 1356 /* read in my part of the matrix numerical values */ 1357 nz = procsnz[0]; 1358 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1359 1360 /* insert into matrix */ 1361 jj = rstart; 1362 smycols = mycols; 1363 svals = vals; 1364 for (i=0; i<m; i++) { 1365 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1366 smycols += ourlens[i]; 1367 svals += ourlens[i]; 1368 jj++; 1369 } 1370 1371 /* read in other processors and ship out */ 1372 for (i=1; i<size; i++) { 1373 nz = procsnz[i]; 1374 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1375 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1376 } 1377 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1378 } else { 1379 /* receive numeric values */ 1380 vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals); 1381 1382 /* receive message of values*/ 1383 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1384 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1385 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1386 1387 /* insert into matrix */ 1388 jj = rstart; 1389 smycols = mycols; 1390 svals = vals; 1391 for (i=0; i<m; i++) { 1392 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1393 smycols += ourlens[i]; 1394 svals += ourlens[i]; 1395 jj++; 1396 } 1397 } 1398 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1399 ierr = PetscFree(vals);CHKERRQ(ierr); 1400 ierr = PetscFree(mycols);CHKERRQ(ierr); 1401 ierr = PetscFree(rowners);CHKERRQ(ierr); 1402 1403 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1404 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 EXTERN_C_END 1408 1409 1410 1411 1412 1413