1 /*$Id: mpidense.c,v 1.150 2001/01/20 03:34:41 bsmith Exp balay $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNC__ 12 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNC__ 41 #define __FUNC__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNC__ 77 #define __FUNC__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNC__ 104 #define __FUNC__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,Scalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNC__ 116 #define __FUNC__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 Scalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNC__ 171 #define __FUNC__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNC__ 179 #define __FUNC__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNC__ 202 #define __FUNC__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 Scalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNC__ 238 #define __FUNC__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNC__ 250 #define __FUNC__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNC__ 265 #define __FUNC__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *procs,*nprocs,j,idx,nsends,*work; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 procs = nprocs + size; 288 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 289 for (i=0; i<N; i++) { 290 idx = rows[i]; 291 found = PETSC_FALSE; 292 for (j=0; j<size; j++) { 293 if (idx >= owners[j] && idx < owners[j+1]) { 294 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 295 } 296 } 297 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 298 } 299 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 300 301 /* inform other processors of number of messages and max length*/ 302 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 303 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 304 nmax = work[rank]; 305 nrecvs = work[size+rank]; 306 ierr = PetscFree(work);CHKERRQ(ierr); 307 308 /* post receives: */ 309 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 310 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 311 for (i=0; i<nrecvs; i++) { 312 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 313 } 314 315 /* do sends: 316 1) starts[i] gives the starting index in svalues for stuff going to 317 the ith processor 318 */ 319 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 320 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 321 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 322 starts[0] = 0; 323 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 324 for (i=0; i<N; i++) { 325 svalues[starts[owner[i]]++] = rows[i]; 326 } 327 ISRestoreIndices(is,&rows); 328 329 starts[0] = 0; 330 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 331 count = 0; 332 for (i=0; i<size; i++) { 333 if (procs[i]) { 334 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscFree(starts);CHKERRQ(ierr); 338 339 base = owners[rank]; 340 341 /* wait on receives */ 342 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 343 source = lens + nrecvs; 344 count = nrecvs; slen = 0; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 347 /* unpack receives into our local space */ 348 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 349 source[imdex] = recv_status.MPI_SOURCE; 350 lens[imdex] = n; 351 slen += n; 352 count--; 353 } 354 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 355 356 /* move the data into the send scatter */ 357 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 358 count = 0; 359 for (i=0; i<nrecvs; i++) { 360 values = rvalues + i*nmax; 361 for (j=0; j<lens[i]; j++) { 362 lrows[count++] = values[j] - base; 363 } 364 } 365 ierr = PetscFree(rvalues);CHKERRQ(ierr); 366 ierr = PetscFree(lens);CHKERRQ(ierr); 367 ierr = PetscFree(owner);CHKERRQ(ierr); 368 ierr = PetscFree(nprocs);CHKERRQ(ierr); 369 370 /* actually zap the local rows */ 371 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 372 PetscLogObjectParent(A,istmp); 373 ierr = PetscFree(lrows);CHKERRQ(ierr); 374 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 375 ierr = ISDestroy(istmp);CHKERRQ(ierr); 376 377 /* wait on sends */ 378 if (nsends) { 379 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 380 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 381 ierr = PetscFree(send_status);CHKERRQ(ierr); 382 } 383 ierr = PetscFree(send_waits);CHKERRQ(ierr); 384 ierr = PetscFree(svalues);CHKERRQ(ierr); 385 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNC__ 390 #define __FUNC__ "MatMult_MPIDense" 391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 394 int ierr; 395 396 PetscFunctionBegin; 397 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 398 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNC__ 404 #define __FUNC__ "MatMultAdd_MPIDense" 405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 406 { 407 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 412 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatMultTranspose_MPIDense" 419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 422 int ierr; 423 Scalar zero = 0.0; 424 425 PetscFunctionBegin; 426 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 427 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 429 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNC__ 434 #define __FUNC__ "MatMultTransposeAdd_MPIDense" 435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 438 int ierr; 439 440 PetscFunctionBegin; 441 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 442 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 443 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 444 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNC__ 449 #define __FUNC__ "MatGetDiagonal_MPIDense" 450 int MatGetDiagonal_MPIDense(Mat A,Vec v) 451 { 452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 453 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 454 int ierr,len,i,n,m = A->m,radd; 455 Scalar *x,zero = 0.0; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,v);CHKERRQ(ierr); 459 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 460 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 461 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 462 len = PetscMin(a->A->m,a->A->n); 463 radd = a->rstart*m; 464 for (i=0; i<len; i++) { 465 x[i] = aloc->v[radd + i*m + i]; 466 } 467 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatDestroy_MPIDense" 473 int MatDestroy_MPIDense(Mat mat) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 480 #if defined(PETSC_USE_LOG) 481 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 482 #endif 483 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 484 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 485 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 486 if (mdn->lvec) VecDestroy(mdn->lvec); 487 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 488 if (mdn->factor) { 489 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 490 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 491 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 492 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(mdn);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNC__ 499 #define __FUNC__ "MatView_MPIDense_Binary" 500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 501 { 502 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503 int ierr; 504 505 PetscFunctionBegin; 506 if (mdn->size == 1) { 507 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 508 } 509 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNC__ 514 #define __FUNC__ "MatView_MPIDense_ASCIIorDraworSocket" 515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 516 { 517 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 518 int ierr,size = mdn->size,rank = mdn->rank; 519 PetscViewerType vtype; 520 PetscTruth isascii,isdraw; 521 PetscViewer sviewer; 522 PetscViewerFormat format; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 527 if (isascii) { 528 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 529 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 530 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 531 MatInfo info; 532 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 533 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 534 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 535 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 536 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } else if (format == PETSC_VIEWER_ASCII_INFO) { 539 PetscFunctionReturn(0); 540 } 541 } else if (isdraw) { 542 PetscDraw draw; 543 PetscTruth isnull; 544 545 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 546 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 547 if (isnull) PetscFunctionReturn(0); 548 } 549 550 if (size == 1) { 551 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 552 } else { 553 /* assemble the entire matrix onto first processor. */ 554 Mat A; 555 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 556 Scalar *vals; 557 558 if (!rank) { 559 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 560 } else { 561 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 562 } 563 PetscLogObjectParent(mat,A); 564 565 /* Copy the matrix ... This isn't the most efficient means, 566 but it's quick for now */ 567 row = mdn->rstart; m = mdn->A->m; 568 for (i=0; i<m; i++) { 569 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 570 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 571 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 572 row++; 573 } 574 575 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 577 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 578 if (!rank) { 579 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 580 } 581 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 583 ierr = MatDestroy(A);CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNC__ 589 #define __FUNC__ "MatView_MPIDense" 590 int MatView_MPIDense(Mat mat,PetscViewer viewer) 591 { 592 int ierr; 593 PetscTruth isascii,isbinary,isdraw,issocket; 594 595 PetscFunctionBegin; 596 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 602 if (isascii || issocket || isdraw) { 603 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNC__ 613 #define __FUNC__ "MatGetInfo_MPIDense" 614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 615 { 616 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 617 Mat mdn = mat->A; 618 int ierr; 619 PetscReal isend[5],irecv[5]; 620 621 PetscFunctionBegin; 622 info->rows_global = (double)A->M; 623 info->columns_global = (double)A->N; 624 info->rows_local = (double)A->m; 625 info->columns_local = (double)A->N; 626 info->block_size = 1.0; 627 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 629 isend[3] = info->memory; isend[4] = info->mallocs; 630 if (flag == MAT_LOCAL) { 631 info->nz_used = isend[0]; 632 info->nz_allocated = isend[1]; 633 info->nz_unneeded = isend[2]; 634 info->memory = isend[3]; 635 info->mallocs = isend[4]; 636 } else if (flag == MAT_GLOBAL_MAX) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } else if (flag == MAT_GLOBAL_SUM) { 644 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 645 info->nz_used = irecv[0]; 646 info->nz_allocated = irecv[1]; 647 info->nz_unneeded = irecv[2]; 648 info->memory = irecv[3]; 649 info->mallocs = irecv[4]; 650 } 651 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 652 info->fill_ratio_needed = 0; 653 info->factor_mallocs = 0; 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatSetOption_MPIDense" 659 int MatSetOption_MPIDense(Mat A,MatOption op) 660 { 661 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 662 int ierr; 663 664 PetscFunctionBegin; 665 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 666 op == MAT_YES_NEW_NONZERO_LOCATIONS || 667 op == MAT_NEW_NONZERO_LOCATION_ERR || 668 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 669 op == MAT_COLUMNS_SORTED || 670 op == MAT_COLUMNS_UNSORTED) { 671 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 672 } else if (op == MAT_ROW_ORIENTED) { 673 a->roworiented = PETSC_TRUE; 674 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 675 } else if (op == MAT_ROWS_SORTED || 676 op == MAT_ROWS_UNSORTED || 677 op == MAT_SYMMETRIC || 678 op == MAT_STRUCTURALLY_SYMMETRIC || 679 op == MAT_YES_NEW_DIAGONALS || 680 op == MAT_USE_HASH_TABLE) { 681 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 682 } else if (op == MAT_COLUMN_ORIENTED) { 683 a->roworiented = PETSC_FALSE; 684 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 685 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 686 a->donotstash = PETSC_TRUE; 687 } else if (op == MAT_NO_NEW_DIAGONALS) { 688 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 689 } else { 690 SETERRQ(PETSC_ERR_SUP,"unknown option"); 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 697 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 698 { 699 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 700 701 PetscFunctionBegin; 702 if (m) *m = mat->rstart; 703 if (n) *n = mat->rend; 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ "MatGetRow_MPIDense" 709 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 710 { 711 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 712 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 713 714 PetscFunctionBegin; 715 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 716 lrow = row - rstart; 717 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNC__ 722 #define __FUNC__ "MatRestoreRow_MPIDense" 723 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 724 { 725 int ierr; 726 727 PetscFunctionBegin; 728 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 729 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNC__ 734 #define __FUNC__ "MatDiagonalScale_MPIDense" 735 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 736 { 737 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 738 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 739 Scalar *l,*r,x,*v; 740 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 741 742 PetscFunctionBegin; 743 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 744 if (ll) { 745 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 746 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 747 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 748 for (i=0; i<m; i++) { 749 x = l[i]; 750 v = mat->v + i; 751 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 752 } 753 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 754 PetscLogFlops(n*m); 755 } 756 if (rr) { 757 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 758 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 759 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 760 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 761 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 762 for (i=0; i<n; i++) { 763 x = r[i]; 764 v = mat->v + i*m; 765 for (j=0; j<m; j++) { (*v++) *= x;} 766 } 767 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 768 PetscLogFlops(n*m); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNC__ 774 #define __FUNC__ "MatNorm_MPIDense" 775 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 776 { 777 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 778 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 779 int ierr,i,j; 780 PetscReal sum = 0.0; 781 Scalar *v = mat->v; 782 783 PetscFunctionBegin; 784 if (mdn->size == 1) { 785 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 786 } else { 787 if (type == NORM_FROBENIUS) { 788 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 789 #if defined(PETSC_USE_COMPLEX) 790 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 791 #else 792 sum += (*v)*(*v); v++; 793 #endif 794 } 795 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 796 *norm = sqrt(*norm); 797 PetscLogFlops(2*mdn->A->n*mdn->A->m); 798 } else if (type == NORM_1) { 799 PetscReal *tmp,*tmp2; 800 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 801 tmp2 = tmp + A->N; 802 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 803 *norm = 0.0; 804 v = mat->v; 805 for (j=0; j<mdn->A->n; j++) { 806 for (i=0; i<mdn->A->m; i++) { 807 tmp[j] += PetscAbsScalar(*v); v++; 808 } 809 } 810 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 811 for (j=0; j<A->N; j++) { 812 if (tmp2[j] > *norm) *norm = tmp2[j]; 813 } 814 ierr = PetscFree(tmp);CHKERRQ(ierr); 815 PetscLogFlops(A->n*A->m); 816 } else if (type == NORM_INFINITY) { /* max row norm */ 817 PetscReal ntemp; 818 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 819 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 820 } else { 821 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 822 } 823 } 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ "MatTranspose_MPIDense" 829 int MatTranspose_MPIDense(Mat A,Mat *matout) 830 { 831 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 832 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 833 Mat B; 834 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 835 int j,i,ierr; 836 Scalar *v; 837 838 PetscFunctionBegin; 839 if (!matout && M != N) { 840 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 841 } 842 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 843 844 m = a->A->m; n = a->A->n; v = Aloc->v; 845 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 846 for (j=0; j<n; j++) { 847 for (i=0; i<m; i++) rwork[i] = rstart + i; 848 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 849 v += m; 850 } 851 ierr = PetscFree(rwork);CHKERRQ(ierr); 852 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 853 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 854 if (matout) { 855 *matout = B; 856 } else { 857 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 858 } 859 PetscFunctionReturn(0); 860 } 861 862 #include "petscblaslapack.h" 863 #undef __FUNC__ 864 #define __FUNC__ "MatScale_MPIDense" 865 int MatScale_MPIDense(Scalar *alpha,Mat inA) 866 { 867 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 868 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 869 int one = 1,nz; 870 871 PetscFunctionBegin; 872 nz = inA->m*inA->N; 873 BLscal_(&nz,alpha,a->v,&one); 874 PetscLogFlops(nz); 875 PetscFunctionReturn(0); 876 } 877 878 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 879 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 880 881 #undef __FUNC__ 882 #define __FUNC__ "MatSetUpPreallocation_MPIDense" 883 int MatSetUpPreallocation_MPIDense(Mat A) 884 { 885 int ierr; 886 887 PetscFunctionBegin; 888 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 /* -------------------------------------------------------------------*/ 893 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 894 MatGetRow_MPIDense, 895 MatRestoreRow_MPIDense, 896 MatMult_MPIDense, 897 MatMultAdd_MPIDense, 898 MatMultTranspose_MPIDense, 899 MatMultTransposeAdd_MPIDense, 900 0, 901 0, 902 0, 903 0, 904 0, 905 0, 906 0, 907 MatTranspose_MPIDense, 908 MatGetInfo_MPIDense,0, 909 MatGetDiagonal_MPIDense, 910 MatDiagonalScale_MPIDense, 911 MatNorm_MPIDense, 912 MatAssemblyBegin_MPIDense, 913 MatAssemblyEnd_MPIDense, 914 0, 915 MatSetOption_MPIDense, 916 MatZeroEntries_MPIDense, 917 MatZeroRows_MPIDense, 918 0, 919 0, 920 0, 921 0, 922 MatSetUpPreallocation_MPIDense, 923 0, 924 MatGetOwnershipRange_MPIDense, 925 0, 926 0, 927 MatGetArray_MPIDense, 928 MatRestoreArray_MPIDense, 929 MatDuplicate_MPIDense, 930 0, 931 0, 932 0, 933 0, 934 0, 935 MatGetSubMatrices_MPIDense, 936 0, 937 MatGetValues_MPIDense, 938 0, 939 0, 940 MatScale_MPIDense, 941 0, 942 0, 943 0, 944 MatGetBlockSize_MPIDense, 945 0, 946 0, 947 0, 948 0, 949 0, 950 0, 951 0, 952 0, 953 0, 954 MatGetSubMatrix_MPIDense, 955 MatDestroy_MPIDense, 956 MatView_MPIDense, 957 MatGetMaps_Petsc}; 958 959 EXTERN_C_BEGIN 960 #undef __FUNC__ 961 #define __FUNC__ "MatCreate_MPIDense" 962 int MatCreate_MPIDense(Mat mat) 963 { 964 Mat_MPIDense *a; 965 int ierr,i; 966 967 PetscFunctionBegin; 968 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 969 mat->data = (void*)a; 970 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 971 mat->factor = 0; 972 mat->mapping = 0; 973 974 a->factor = 0; 975 mat->insertmode = NOT_SET_VALUES; 976 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 977 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 978 979 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 980 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 981 a->nvec = mat->n; 982 983 /* the information in the maps duplicates the information computed below, eventually 984 we should remove the duplicate information that is not contained in the maps */ 985 ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 986 ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 987 988 /* build local table of row and column ownerships */ 989 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 990 a->cowners = a->rowners + a->size + 1; 991 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 992 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 993 a->rowners[0] = 0; 994 for (i=2; i<=a->size; i++) { 995 a->rowners[i] += a->rowners[i-1]; 996 } 997 a->rstart = a->rowners[a->rank]; 998 a->rend = a->rowners[a->rank+1]; 999 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1000 a->cowners[0] = 0; 1001 for (i=2; i<=a->size; i++) { 1002 a->cowners[i] += a->cowners[i-1]; 1003 } 1004 1005 /* build cache for off array entries formed */ 1006 a->donotstash = PETSC_FALSE; 1007 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1008 1009 /* stuff used for matrix vector multiply */ 1010 a->lvec = 0; 1011 a->Mvctx = 0; 1012 a->roworiented = PETSC_TRUE; 1013 1014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1015 "MatGetDiagonalBlock_MPIDense", 1016 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 EXTERN_C_END 1020 1021 #undef __FUNC__ 1022 #define __FUNC__ "MatMPIDenseSetPreallocation" 1023 /*@C 1024 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1025 1026 Not collective 1027 1028 Input Parameters: 1029 . A - the matrix 1030 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1031 to control all matrix memory allocation. 1032 1033 Notes: 1034 The dense format is fully compatible with standard Fortran 77 1035 storage by columns. 1036 1037 The data input variable is intended primarily for Fortran programmers 1038 who wish to allocate their own matrix memory space. Most users should 1039 set data=PETSC_NULL. 1040 1041 Level: intermediate 1042 1043 .keywords: matrix,dense, parallel 1044 1045 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1046 @*/ 1047 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data) 1048 { 1049 Mat_MPIDense *a; 1050 int ierr; 1051 PetscTruth flg2; 1052 1053 PetscFunctionBegin; 1054 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1055 if (!flg2) PetscFunctionReturn(0); 1056 mat->preallocated = PETSC_TRUE; 1057 /* Note: For now, when data is specified above, this assumes the user correctly 1058 allocates the local dense storage space. We should add error checking. */ 1059 1060 a = (Mat_MPIDense*)mat->data; 1061 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1062 PetscLogObjectParent(mat,a->A); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNC__ 1067 #define __FUNC__ "MatCreateMPIDense" 1068 /*@C 1069 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1070 1071 Collective on MPI_Comm 1072 1073 Input Parameters: 1074 + comm - MPI communicator 1075 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1076 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1077 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1078 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1079 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1080 to control all matrix memory allocation. 1081 1082 Output Parameter: 1083 . A - the matrix 1084 1085 Notes: 1086 The dense format is fully compatible with standard Fortran 77 1087 storage by columns. 1088 1089 The data input variable is intended primarily for Fortran programmers 1090 who wish to allocate their own matrix memory space. Most users should 1091 set data=PETSC_NULL. 1092 1093 The user MUST specify either the local or global matrix dimensions 1094 (possibly both). 1095 1096 Level: intermediate 1097 1098 .keywords: matrix,dense, parallel 1099 1100 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1101 @*/ 1102 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1103 { 1104 int ierr,size; 1105 1106 PetscFunctionBegin; 1107 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1109 if (size > 1) { 1110 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1111 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1112 } else { 1113 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1114 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1115 } 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatDuplicate_MPIDense" 1121 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1122 { 1123 Mat mat; 1124 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1125 int ierr; 1126 1127 PetscFunctionBegin; 1128 *newmat = 0; 1129 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1130 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1131 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1132 mat->data = (void*)a; 1133 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1134 mat->factor = A->factor; 1135 mat->assembled = PETSC_TRUE; 1136 mat->preallocated = PETSC_TRUE; 1137 1138 a->rstart = oldmat->rstart; 1139 a->rend = oldmat->rend; 1140 a->size = oldmat->size; 1141 a->rank = oldmat->rank; 1142 mat->insertmode = NOT_SET_VALUES; 1143 a->nvec = oldmat->nvec; 1144 a->donotstash = oldmat->donotstash; 1145 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1146 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1147 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1148 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1149 1150 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1151 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1152 PetscLogObjectParent(mat,a->A); 1153 *newmat = mat; 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #include "petscsys.h" 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1161 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1162 { 1163 int *rowners,i,size,rank,m,ierr,nz,j; 1164 Scalar *array,*vals,*vals_ptr; 1165 MPI_Status status; 1166 1167 PetscFunctionBegin; 1168 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 1171 /* determine ownership of all rows */ 1172 m = M/size + ((M % size) > rank); 1173 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1174 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1175 rowners[0] = 0; 1176 for (i=2; i<=size; i++) { 1177 rowners[i] += rowners[i-1]; 1178 } 1179 1180 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1181 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1182 1183 if (!rank) { 1184 ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr); 1185 1186 /* read in my part of the matrix numerical values */ 1187 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1188 1189 /* insert into matrix-by row (this is why cannot directly read into array */ 1190 vals_ptr = vals; 1191 for (i=0; i<m; i++) { 1192 for (j=0; j<N; j++) { 1193 array[i + j*m] = *vals_ptr++; 1194 } 1195 } 1196 1197 /* read in other processors and ship out */ 1198 for (i=1; i<size; i++) { 1199 nz = (rowners[i+1] - rowners[i])*N; 1200 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1201 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1202 } 1203 } else { 1204 /* receive numeric values */ 1205 ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr); 1206 1207 /* receive message of values*/ 1208 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1209 1210 /* insert into matrix-by row (this is why cannot directly read into array */ 1211 vals_ptr = vals; 1212 for (i=0; i<m; i++) { 1213 for (j=0; j<N; j++) { 1214 array[i + j*m] = *vals_ptr++; 1215 } 1216 } 1217 } 1218 ierr = PetscFree(rowners);CHKERRQ(ierr); 1219 ierr = PetscFree(vals);CHKERRQ(ierr); 1220 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1221 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 EXTERN_C_BEGIN 1226 #undef __FUNC__ 1227 #define __FUNC__ "MatLoad_MPIDense" 1228 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1229 { 1230 Mat A; 1231 Scalar *vals,*svals; 1232 MPI_Comm comm = ((PetscObject)viewer)->comm; 1233 MPI_Status status; 1234 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1235 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1236 int tag = ((PetscObject)viewer)->tag; 1237 int i,nz,ierr,j,rstart,rend,fd; 1238 1239 PetscFunctionBegin; 1240 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1241 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1242 if (!rank) { 1243 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1244 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1245 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1246 } 1247 1248 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1249 M = header[1]; N = header[2]; nz = header[3]; 1250 1251 /* 1252 Handle case where matrix is stored on disk as a dense matrix 1253 */ 1254 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1255 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 /* determine ownership of all rows */ 1260 m = M/size + ((M % size) > rank); 1261 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1262 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1263 rowners[0] = 0; 1264 for (i=2; i<=size; i++) { 1265 rowners[i] += rowners[i-1]; 1266 } 1267 rstart = rowners[rank]; 1268 rend = rowners[rank+1]; 1269 1270 /* distribute row lengths to all processors */ 1271 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1272 offlens = ourlens + (rend-rstart); 1273 if (!rank) { 1274 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1275 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1276 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1277 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1278 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1279 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1280 } else { 1281 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1282 } 1283 1284 if (!rank) { 1285 /* calculate the number of nonzeros on each processor */ 1286 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1287 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1288 for (i=0; i<size; i++) { 1289 for (j=rowners[i]; j< rowners[i+1]; j++) { 1290 procsnz[i] += rowlengths[j]; 1291 } 1292 } 1293 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1294 1295 /* determine max buffer needed and allocate it */ 1296 maxnz = 0; 1297 for (i=0; i<size; i++) { 1298 maxnz = PetscMax(maxnz,procsnz[i]); 1299 } 1300 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1301 1302 /* read in my part of the matrix column indices */ 1303 nz = procsnz[0]; 1304 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1305 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1306 1307 /* read in every one elses and ship off */ 1308 for (i=1; i<size; i++) { 1309 nz = procsnz[i]; 1310 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1311 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1312 } 1313 ierr = PetscFree(cols);CHKERRQ(ierr); 1314 } else { 1315 /* determine buffer space needed for message */ 1316 nz = 0; 1317 for (i=0; i<m; i++) { 1318 nz += ourlens[i]; 1319 } 1320 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1321 1322 /* receive message of column indices*/ 1323 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1324 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1325 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1326 } 1327 1328 /* loop over local rows, determining number of off diagonal entries */ 1329 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1330 jj = 0; 1331 for (i=0; i<m; i++) { 1332 for (j=0; j<ourlens[i]; j++) { 1333 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1334 jj++; 1335 } 1336 } 1337 1338 /* create our matrix */ 1339 for (i=0; i<m; i++) { 1340 ourlens[i] -= offlens[i]; 1341 } 1342 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1343 A = *newmat; 1344 for (i=0; i<m; i++) { 1345 ourlens[i] += offlens[i]; 1346 } 1347 1348 if (!rank) { 1349 ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1350 1351 /* read in my part of the matrix numerical values */ 1352 nz = procsnz[0]; 1353 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1354 1355 /* insert into matrix */ 1356 jj = rstart; 1357 smycols = mycols; 1358 svals = vals; 1359 for (i=0; i<m; i++) { 1360 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1361 smycols += ourlens[i]; 1362 svals += ourlens[i]; 1363 jj++; 1364 } 1365 1366 /* read in other processors and ship out */ 1367 for (i=1; i<size; i++) { 1368 nz = procsnz[i]; 1369 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1370 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1371 } 1372 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1373 } else { 1374 /* receive numeric values */ 1375 ierr = PetscMalloc(nz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1376 1377 /* receive message of values*/ 1378 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1379 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1380 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1381 1382 /* insert into matrix */ 1383 jj = rstart; 1384 smycols = mycols; 1385 svals = vals; 1386 for (i=0; i<m; i++) { 1387 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1388 smycols += ourlens[i]; 1389 svals += ourlens[i]; 1390 jj++; 1391 } 1392 } 1393 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1394 ierr = PetscFree(vals);CHKERRQ(ierr); 1395 ierr = PetscFree(mycols);CHKERRQ(ierr); 1396 ierr = PetscFree(rowners);CHKERRQ(ierr); 1397 1398 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 EXTERN_C_END 1403 1404 1405 1406 1407 1408