1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,PetscScalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 PetscScalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,PetscScalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 PetscScalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *procs,*nprocs,j,idx,nsends,*work; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 procs = nprocs + size; 288 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 289 for (i=0; i<N; i++) { 290 idx = rows[i]; 291 found = PETSC_FALSE; 292 for (j=0; j<size; j++) { 293 if (idx >= owners[j] && idx < owners[j+1]) { 294 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 295 } 296 } 297 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 298 } 299 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 300 301 /* inform other processors of number of messages and max length*/ 302 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 303 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 304 nmax = work[rank]; 305 nrecvs = work[size+rank]; 306 ierr = PetscFree(work);CHKERRQ(ierr); 307 308 /* post receives: */ 309 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 310 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 311 for (i=0; i<nrecvs; i++) { 312 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 313 } 314 315 /* do sends: 316 1) starts[i] gives the starting index in svalues for stuff going to 317 the ith processor 318 */ 319 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 320 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 321 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 322 starts[0] = 0; 323 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 324 for (i=0; i<N; i++) { 325 svalues[starts[owner[i]]++] = rows[i]; 326 } 327 ISRestoreIndices(is,&rows); 328 329 starts[0] = 0; 330 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 331 count = 0; 332 for (i=0; i<size; i++) { 333 if (procs[i]) { 334 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscFree(starts);CHKERRQ(ierr); 338 339 base = owners[rank]; 340 341 /* wait on receives */ 342 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 343 source = lens + nrecvs; 344 count = nrecvs; slen = 0; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 347 /* unpack receives into our local space */ 348 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 349 source[imdex] = recv_status.MPI_SOURCE; 350 lens[imdex] = n; 351 slen += n; 352 count--; 353 } 354 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 355 356 /* move the data into the send scatter */ 357 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 358 count = 0; 359 for (i=0; i<nrecvs; i++) { 360 values = rvalues + i*nmax; 361 for (j=0; j<lens[i]; j++) { 362 lrows[count++] = values[j] - base; 363 } 364 } 365 ierr = PetscFree(rvalues);CHKERRQ(ierr); 366 ierr = PetscFree(lens);CHKERRQ(ierr); 367 ierr = PetscFree(owner);CHKERRQ(ierr); 368 ierr = PetscFree(nprocs);CHKERRQ(ierr); 369 370 /* actually zap the local rows */ 371 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 372 PetscLogObjectParent(A,istmp); 373 ierr = PetscFree(lrows);CHKERRQ(ierr); 374 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 375 ierr = ISDestroy(istmp);CHKERRQ(ierr); 376 377 /* wait on sends */ 378 if (nsends) { 379 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 380 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 381 ierr = PetscFree(send_status);CHKERRQ(ierr); 382 } 383 ierr = PetscFree(send_waits);CHKERRQ(ierr); 384 ierr = PetscFree(svalues);CHKERRQ(ierr); 385 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatMult_MPIDense" 391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 394 int ierr; 395 396 PetscFunctionBegin; 397 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 398 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatMultAdd_MPIDense" 405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 406 { 407 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 412 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatMultTranspose_MPIDense" 419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 422 int ierr; 423 PetscScalar zero = 0.0; 424 425 PetscFunctionBegin; 426 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 427 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 429 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 438 int ierr; 439 440 PetscFunctionBegin; 441 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 442 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 443 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 444 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatGetDiagonal_MPIDense" 450 int MatGetDiagonal_MPIDense(Mat A,Vec v) 451 { 452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 453 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 454 int ierr,len,i,n,m = A->m,radd; 455 PetscScalar *x,zero = 0.0; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,v);CHKERRQ(ierr); 459 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 460 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 461 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 462 len = PetscMin(a->A->m,a->A->n); 463 radd = a->rstart*m; 464 for (i=0; i<len; i++) { 465 x[i] = aloc->v[radd + i*m + i]; 466 } 467 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatDestroy_MPIDense" 473 int MatDestroy_MPIDense(Mat mat) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 480 #if defined(PETSC_USE_LOG) 481 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 482 #endif 483 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 484 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 485 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 486 if (mdn->lvec) VecDestroy(mdn->lvec); 487 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 488 if (mdn->factor) { 489 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 490 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 491 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 492 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(mdn);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatView_MPIDense_Binary" 500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 501 { 502 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503 int ierr; 504 505 PetscFunctionBegin; 506 if (mdn->size == 1) { 507 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 508 } 509 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 516 { 517 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 518 int ierr,size = mdn->size,rank = mdn->rank; 519 PetscViewerType vtype; 520 PetscTruth isascii,isdraw; 521 PetscViewer sviewer; 522 PetscViewerFormat format; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 527 if (isascii) { 528 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 529 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 530 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 531 MatInfo info; 532 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 533 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 534 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 535 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 536 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } else if (format == PETSC_VIEWER_ASCII_INFO) { 539 PetscFunctionReturn(0); 540 } 541 } else if (isdraw) { 542 PetscDraw draw; 543 PetscTruth isnull; 544 545 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 546 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 547 if (isnull) PetscFunctionReturn(0); 548 } 549 550 if (size == 1) { 551 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 552 } else { 553 /* assemble the entire matrix onto first processor. */ 554 Mat A; 555 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 556 PetscScalar *vals; 557 558 if (!rank) { 559 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 560 } else { 561 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 562 } 563 PetscLogObjectParent(mat,A); 564 565 /* Copy the matrix ... This isn't the most efficient means, 566 but it's quick for now */ 567 row = mdn->rstart; m = mdn->A->m; 568 for (i=0; i<m; i++) { 569 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 570 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 571 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 572 row++; 573 } 574 575 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 577 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 578 if (!rank) { 579 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 580 } 581 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 583 ierr = MatDestroy(A);CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense" 590 int MatView_MPIDense(Mat mat,PetscViewer viewer) 591 { 592 int ierr; 593 PetscTruth isascii,isbinary,isdraw,issocket; 594 595 PetscFunctionBegin; 596 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 602 if (isascii || issocket || isdraw) { 603 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatGetInfo_MPIDense" 614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 615 { 616 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 617 Mat mdn = mat->A; 618 int ierr; 619 PetscReal isend[5],irecv[5]; 620 621 PetscFunctionBegin; 622 info->rows_global = (double)A->M; 623 info->columns_global = (double)A->N; 624 info->rows_local = (double)A->m; 625 info->columns_local = (double)A->N; 626 info->block_size = 1.0; 627 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 629 isend[3] = info->memory; isend[4] = info->mallocs; 630 if (flag == MAT_LOCAL) { 631 info->nz_used = isend[0]; 632 info->nz_allocated = isend[1]; 633 info->nz_unneeded = isend[2]; 634 info->memory = isend[3]; 635 info->mallocs = isend[4]; 636 } else if (flag == MAT_GLOBAL_MAX) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } else if (flag == MAT_GLOBAL_SUM) { 644 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 645 info->nz_used = irecv[0]; 646 info->nz_allocated = irecv[1]; 647 info->nz_unneeded = irecv[2]; 648 info->memory = irecv[3]; 649 info->mallocs = irecv[4]; 650 } 651 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 652 info->fill_ratio_needed = 0; 653 info->factor_mallocs = 0; 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetOption_MPIDense" 659 int MatSetOption_MPIDense(Mat A,MatOption op) 660 { 661 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 662 int ierr; 663 664 PetscFunctionBegin; 665 switch (op) { 666 case MAT_NO_NEW_NONZERO_LOCATIONS: 667 case MAT_YES_NEW_NONZERO_LOCATIONS: 668 case MAT_NEW_NONZERO_LOCATION_ERR: 669 case MAT_NEW_NONZERO_ALLOCATION_ERR: 670 case MAT_COLUMNS_SORTED: 671 case MAT_COLUMNS_UNSORTED: 672 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 673 break; 674 case MAT_ROW_ORIENTED: 675 a->roworiented = PETSC_TRUE; 676 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 677 break; 678 case MAT_ROWS_SORTED: 679 case MAT_ROWS_UNSORTED: 680 case MAT_YES_NEW_DIAGONALS: 681 case MAT_USE_HASH_TABLE: 682 case MAT_USE_SINGLE_PRECISION_SOLVES: 683 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 684 break; 685 case MAT_COLUMN_ORIENTED: 686 a->roworiented = PETSC_FALSE; 687 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 688 break; 689 case MAT_IGNORE_OFF_PROC_ENTRIES: 690 a->donotstash = PETSC_TRUE; 691 break; 692 case MAT_NO_NEW_DIAGONALS: 693 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 694 break; 695 default: 696 SETERRQ(PETSC_ERR_SUP,"unknown option"); 697 break; 698 } 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatGetRow_MPIDense" 704 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 705 { 706 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 707 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 708 709 PetscFunctionBegin; 710 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 711 lrow = row - rstart; 712 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatRestoreRow_MPIDense" 718 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 719 { 720 int ierr; 721 722 PetscFunctionBegin; 723 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 724 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDiagonalScale_MPIDense" 730 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 731 { 732 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 733 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 734 PetscScalar *l,*r,x,*v; 735 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 736 737 PetscFunctionBegin; 738 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 739 if (ll) { 740 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 741 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 742 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 743 for (i=0; i<m; i++) { 744 x = l[i]; 745 v = mat->v + i; 746 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 747 } 748 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 749 PetscLogFlops(n*m); 750 } 751 if (rr) { 752 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 753 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 754 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 755 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 756 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 757 for (i=0; i<n; i++) { 758 x = r[i]; 759 v = mat->v + i*m; 760 for (j=0; j<m; j++) { (*v++) *= x;} 761 } 762 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 763 PetscLogFlops(n*m); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatNorm_MPIDense" 770 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 771 { 772 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 773 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 774 int ierr,i,j; 775 PetscReal sum = 0.0; 776 PetscScalar *v = mat->v; 777 778 PetscFunctionBegin; 779 if (mdn->size == 1) { 780 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 781 } else { 782 if (type == NORM_FROBENIUS) { 783 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 784 #if defined(PETSC_USE_COMPLEX) 785 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 786 #else 787 sum += (*v)*(*v); v++; 788 #endif 789 } 790 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 791 *norm = sqrt(*norm); 792 PetscLogFlops(2*mdn->A->n*mdn->A->m); 793 } else if (type == NORM_1) { 794 PetscReal *tmp,*tmp2; 795 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 796 tmp2 = tmp + A->N; 797 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 798 *norm = 0.0; 799 v = mat->v; 800 for (j=0; j<mdn->A->n; j++) { 801 for (i=0; i<mdn->A->m; i++) { 802 tmp[j] += PetscAbsScalar(*v); v++; 803 } 804 } 805 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 806 for (j=0; j<A->N; j++) { 807 if (tmp2[j] > *norm) *norm = tmp2[j]; 808 } 809 ierr = PetscFree(tmp);CHKERRQ(ierr); 810 PetscLogFlops(A->n*A->m); 811 } else if (type == NORM_INFINITY) { /* max row norm */ 812 PetscReal ntemp; 813 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 814 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 815 } else { 816 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 817 } 818 } 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatTranspose_MPIDense" 824 int MatTranspose_MPIDense(Mat A,Mat *matout) 825 { 826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 827 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 828 Mat B; 829 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 830 int j,i,ierr; 831 PetscScalar *v; 832 833 PetscFunctionBegin; 834 if (!matout && M != N) { 835 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 836 } 837 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 838 839 m = a->A->m; n = a->A->n; v = Aloc->v; 840 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 841 for (j=0; j<n; j++) { 842 for (i=0; i<m; i++) rwork[i] = rstart + i; 843 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 844 v += m; 845 } 846 ierr = PetscFree(rwork);CHKERRQ(ierr); 847 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 848 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 849 if (matout) { 850 *matout = B; 851 } else { 852 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 #include "petscblaslapack.h" 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatScale_MPIDense" 860 int MatScale_MPIDense(PetscScalar *alpha,Mat inA) 861 { 862 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 863 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 864 int one = 1,nz; 865 866 PetscFunctionBegin; 867 nz = inA->m*inA->N; 868 BLscal_(&nz,alpha,a->v,&one); 869 PetscLogFlops(nz); 870 PetscFunctionReturn(0); 871 } 872 873 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 874 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 878 int MatSetUpPreallocation_MPIDense(Mat A) 879 { 880 int ierr; 881 882 PetscFunctionBegin; 883 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 /* -------------------------------------------------------------------*/ 888 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 889 MatGetRow_MPIDense, 890 MatRestoreRow_MPIDense, 891 MatMult_MPIDense, 892 MatMultAdd_MPIDense, 893 MatMultTranspose_MPIDense, 894 MatMultTransposeAdd_MPIDense, 895 0, 896 0, 897 0, 898 0, 899 0, 900 0, 901 0, 902 MatTranspose_MPIDense, 903 MatGetInfo_MPIDense,0, 904 MatGetDiagonal_MPIDense, 905 MatDiagonalScale_MPIDense, 906 MatNorm_MPIDense, 907 MatAssemblyBegin_MPIDense, 908 MatAssemblyEnd_MPIDense, 909 0, 910 MatSetOption_MPIDense, 911 MatZeroEntries_MPIDense, 912 MatZeroRows_MPIDense, 913 0, 914 0, 915 0, 916 0, 917 MatSetUpPreallocation_MPIDense, 918 0, 919 0, 920 MatGetArray_MPIDense, 921 MatRestoreArray_MPIDense, 922 MatDuplicate_MPIDense, 923 0, 924 0, 925 0, 926 0, 927 0, 928 MatGetSubMatrices_MPIDense, 929 0, 930 MatGetValues_MPIDense, 931 0, 932 0, 933 MatScale_MPIDense, 934 0, 935 0, 936 0, 937 MatGetBlockSize_MPIDense, 938 0, 939 0, 940 0, 941 0, 942 0, 943 0, 944 0, 945 0, 946 0, 947 MatGetSubMatrix_MPIDense, 948 MatDestroy_MPIDense, 949 MatView_MPIDense, 950 MatGetPetscMaps_Petsc}; 951 952 EXTERN_C_BEGIN 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatCreate_MPIDense" 955 int MatCreate_MPIDense(Mat mat) 956 { 957 Mat_MPIDense *a; 958 int ierr,i; 959 960 PetscFunctionBegin; 961 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 962 mat->data = (void*)a; 963 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 964 mat->factor = 0; 965 mat->mapping = 0; 966 967 a->factor = 0; 968 mat->insertmode = NOT_SET_VALUES; 969 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 970 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 971 972 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 973 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 974 a->nvec = mat->n; 975 976 /* the information in the maps duplicates the information computed below, eventually 977 we should remove the duplicate information that is not contained in the maps */ 978 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 979 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 980 981 /* build local table of row and column ownerships */ 982 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 983 a->cowners = a->rowners + a->size + 1; 984 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 985 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 986 a->rowners[0] = 0; 987 for (i=2; i<=a->size; i++) { 988 a->rowners[i] += a->rowners[i-1]; 989 } 990 a->rstart = a->rowners[a->rank]; 991 a->rend = a->rowners[a->rank+1]; 992 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 993 a->cowners[0] = 0; 994 for (i=2; i<=a->size; i++) { 995 a->cowners[i] += a->cowners[i-1]; 996 } 997 998 /* build cache for off array entries formed */ 999 a->donotstash = PETSC_FALSE; 1000 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1001 1002 /* stuff used for matrix vector multiply */ 1003 a->lvec = 0; 1004 a->Mvctx = 0; 1005 a->roworiented = PETSC_TRUE; 1006 1007 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1008 "MatGetDiagonalBlock_MPIDense", 1009 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 EXTERN_C_END 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1016 /*@C 1017 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1018 1019 Not collective 1020 1021 Input Parameters: 1022 . A - the matrix 1023 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1024 to control all matrix memory allocation. 1025 1026 Notes: 1027 The dense format is fully compatible with standard Fortran 77 1028 storage by columns. 1029 1030 The data input variable is intended primarily for Fortran programmers 1031 who wish to allocate their own matrix memory space. Most users should 1032 set data=PETSC_NULL. 1033 1034 Level: intermediate 1035 1036 .keywords: matrix,dense, parallel 1037 1038 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1039 @*/ 1040 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1041 { 1042 Mat_MPIDense *a; 1043 int ierr; 1044 PetscTruth flg2; 1045 1046 PetscFunctionBegin; 1047 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1048 if (!flg2) PetscFunctionReturn(0); 1049 mat->preallocated = PETSC_TRUE; 1050 /* Note: For now, when data is specified above, this assumes the user correctly 1051 allocates the local dense storage space. We should add error checking. */ 1052 1053 a = (Mat_MPIDense*)mat->data; 1054 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1055 PetscLogObjectParent(mat,a->A); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "MatCreateMPIDense" 1061 /*@C 1062 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1063 1064 Collective on MPI_Comm 1065 1066 Input Parameters: 1067 + comm - MPI communicator 1068 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1069 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1070 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1071 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1072 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1073 to control all matrix memory allocation. 1074 1075 Output Parameter: 1076 . A - the matrix 1077 1078 Notes: 1079 The dense format is fully compatible with standard Fortran 77 1080 storage by columns. 1081 1082 The data input variable is intended primarily for Fortran programmers 1083 who wish to allocate their own matrix memory space. Most users should 1084 set data=PETSC_NULL. 1085 1086 The user MUST specify either the local or global matrix dimensions 1087 (possibly both). 1088 1089 Level: intermediate 1090 1091 .keywords: matrix,dense, parallel 1092 1093 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1094 @*/ 1095 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1096 { 1097 int ierr,size; 1098 1099 PetscFunctionBegin; 1100 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1101 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1102 if (size > 1) { 1103 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1104 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1105 } else { 1106 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1107 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatDuplicate_MPIDense" 1114 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1115 { 1116 Mat mat; 1117 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1118 int ierr; 1119 1120 PetscFunctionBegin; 1121 *newmat = 0; 1122 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1123 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1124 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1125 mat->data = (void*)a; 1126 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1127 mat->factor = A->factor; 1128 mat->assembled = PETSC_TRUE; 1129 mat->preallocated = PETSC_TRUE; 1130 1131 a->rstart = oldmat->rstart; 1132 a->rend = oldmat->rend; 1133 a->size = oldmat->size; 1134 a->rank = oldmat->rank; 1135 mat->insertmode = NOT_SET_VALUES; 1136 a->nvec = oldmat->nvec; 1137 a->donotstash = oldmat->donotstash; 1138 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1139 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1140 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1141 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1142 1143 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1144 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1145 PetscLogObjectParent(mat,a->A); 1146 *newmat = mat; 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #include "petscsys.h" 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1154 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1155 { 1156 int *rowners,i,size,rank,m,ierr,nz,j; 1157 PetscScalar *array,*vals,*vals_ptr; 1158 MPI_Status status; 1159 1160 PetscFunctionBegin; 1161 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1162 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1163 1164 /* determine ownership of all rows */ 1165 m = M/size + ((M % size) > rank); 1166 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1167 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1168 rowners[0] = 0; 1169 for (i=2; i<=size; i++) { 1170 rowners[i] += rowners[i-1]; 1171 } 1172 1173 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1174 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1175 1176 if (!rank) { 1177 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1178 1179 /* read in my part of the matrix numerical values */ 1180 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1181 1182 /* insert into matrix-by row (this is why cannot directly read into array */ 1183 vals_ptr = vals; 1184 for (i=0; i<m; i++) { 1185 for (j=0; j<N; j++) { 1186 array[i + j*m] = *vals_ptr++; 1187 } 1188 } 1189 1190 /* read in other processors and ship out */ 1191 for (i=1; i<size; i++) { 1192 nz = (rowners[i+1] - rowners[i])*N; 1193 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1194 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1195 } 1196 } else { 1197 /* receive numeric values */ 1198 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1199 1200 /* receive message of values*/ 1201 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1202 1203 /* insert into matrix-by row (this is why cannot directly read into array */ 1204 vals_ptr = vals; 1205 for (i=0; i<m; i++) { 1206 for (j=0; j<N; j++) { 1207 array[i + j*m] = *vals_ptr++; 1208 } 1209 } 1210 } 1211 ierr = PetscFree(rowners);CHKERRQ(ierr); 1212 ierr = PetscFree(vals);CHKERRQ(ierr); 1213 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1214 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 EXTERN_C_BEGIN 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatLoad_MPIDense" 1221 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1222 { 1223 Mat A; 1224 PetscScalar *vals,*svals; 1225 MPI_Comm comm = ((PetscObject)viewer)->comm; 1226 MPI_Status status; 1227 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1228 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1229 int tag = ((PetscObject)viewer)->tag; 1230 int i,nz,ierr,j,rstart,rend,fd; 1231 1232 PetscFunctionBegin; 1233 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1234 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1235 if (!rank) { 1236 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1237 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1238 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1239 } 1240 1241 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1242 M = header[1]; N = header[2]; nz = header[3]; 1243 1244 /* 1245 Handle case where matrix is stored on disk as a dense matrix 1246 */ 1247 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1248 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /* determine ownership of all rows */ 1253 m = M/size + ((M % size) > rank); 1254 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1255 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1256 rowners[0] = 0; 1257 for (i=2; i<=size; i++) { 1258 rowners[i] += rowners[i-1]; 1259 } 1260 rstart = rowners[rank]; 1261 rend = rowners[rank+1]; 1262 1263 /* distribute row lengths to all processors */ 1264 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1265 offlens = ourlens + (rend-rstart); 1266 if (!rank) { 1267 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1268 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1269 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1270 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1271 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1272 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1273 } else { 1274 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1275 } 1276 1277 if (!rank) { 1278 /* calculate the number of nonzeros on each processor */ 1279 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1280 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1281 for (i=0; i<size; i++) { 1282 for (j=rowners[i]; j< rowners[i+1]; j++) { 1283 procsnz[i] += rowlengths[j]; 1284 } 1285 } 1286 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1287 1288 /* determine max buffer needed and allocate it */ 1289 maxnz = 0; 1290 for (i=0; i<size; i++) { 1291 maxnz = PetscMax(maxnz,procsnz[i]); 1292 } 1293 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1294 1295 /* read in my part of the matrix column indices */ 1296 nz = procsnz[0]; 1297 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1298 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1299 1300 /* read in every one elses and ship off */ 1301 for (i=1; i<size; i++) { 1302 nz = procsnz[i]; 1303 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1304 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1305 } 1306 ierr = PetscFree(cols);CHKERRQ(ierr); 1307 } else { 1308 /* determine buffer space needed for message */ 1309 nz = 0; 1310 for (i=0; i<m; i++) { 1311 nz += ourlens[i]; 1312 } 1313 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1314 1315 /* receive message of column indices*/ 1316 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1317 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1318 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1319 } 1320 1321 /* loop over local rows, determining number of off diagonal entries */ 1322 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1323 jj = 0; 1324 for (i=0; i<m; i++) { 1325 for (j=0; j<ourlens[i]; j++) { 1326 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1327 jj++; 1328 } 1329 } 1330 1331 /* create our matrix */ 1332 for (i=0; i<m; i++) { 1333 ourlens[i] -= offlens[i]; 1334 } 1335 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1336 A = *newmat; 1337 for (i=0; i<m; i++) { 1338 ourlens[i] += offlens[i]; 1339 } 1340 1341 if (!rank) { 1342 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1343 1344 /* read in my part of the matrix numerical values */ 1345 nz = procsnz[0]; 1346 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1347 1348 /* insert into matrix */ 1349 jj = rstart; 1350 smycols = mycols; 1351 svals = vals; 1352 for (i=0; i<m; i++) { 1353 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1354 smycols += ourlens[i]; 1355 svals += ourlens[i]; 1356 jj++; 1357 } 1358 1359 /* read in other processors and ship out */ 1360 for (i=1; i<size; i++) { 1361 nz = procsnz[i]; 1362 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1363 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1366 } else { 1367 /* receive numeric values */ 1368 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1369 1370 /* receive message of values*/ 1371 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1372 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1373 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1374 1375 /* insert into matrix */ 1376 jj = rstart; 1377 smycols = mycols; 1378 svals = vals; 1379 for (i=0; i<m; i++) { 1380 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1381 smycols += ourlens[i]; 1382 svals += ourlens[i]; 1383 jj++; 1384 } 1385 } 1386 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1387 ierr = PetscFree(vals);CHKERRQ(ierr); 1388 ierr = PetscFree(mycols);CHKERRQ(ierr); 1389 ierr = PetscFree(rowners);CHKERRQ(ierr); 1390 1391 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 EXTERN_C_END 1396 1397 1398 1399 1400 1401